From 7ae9f2ab75d6de161f64851941b375f0e3cb3d49 Mon Sep 17 00:00:00 2001 From: Elizabeth <hashim.sharif91@gmail.com> Date: Sat, 16 Nov 2019 18:50:51 -0600 Subject: [PATCH] Changed all instances of fp32 to fp16 except for the fp32 baseline --- .../autotuner_data/tuner_confs_batch220.txt | 1176 +- .../tuner_pareto_confs_batch220.txt | 518 +- .../tuner_promise_confs_batch220_multi.txt | 12984 ++++++------- .../tuner_promise_confs_batch220_single.txt | 5642 +++--- .../autotuner_data/tuner_confs_batch220.txt | 2574 +-- .../tuner_pareto_confs_batch220.txt | 686 +- .../tuner_promise_confs_batch220_multi.txt | 15840 ++++++++-------- .../tuner_promise_confs_batch220_single.txt | 8108 ++++---- .../autotuner_data/tuner_confs_batch220.txt | 3284 ++-- .../tuner_pareto_confs_batch220.txt | 1110 +- .../tuner_promise_confs_batch220_multi.txt | 13614 ++++++------- .../tuner_promise_confs_batch220_single.txt | 5420 +++--- .../autotuner_data/tuner_confs_batch220.txt | 400 +- .../tuner_pareto_confs_batch220.txt | 340 +- .../tuner_promise_confs_batch220_multi.txt | 2014 +- .../tuner_promise_confs_batch220_single.txt | 1434 +- .../autotuner_data/tuner_confs_batch220.txt | 152 +- .../tuner_pareto_confs_batch220.txt | 120 +- .../tuner_promise_confs_batch220_multi.txt | 504 +- .../tuner_promise_confs_batch220_single.txt | 196 +- .../autotuner_data/tuner_confs_batch220.txt | 3266 ++-- .../tuner_pareto_confs_batch220.txt | 2438 +-- .../tuner_promise_confs_batch220_multi.txt | 3766 ++-- .../tuner_promise_confs_batch220_single.txt | 914 +- .../autotuner_data/tuner_confs_batch220.txt | 13440 ++++++------- .../tuner_pareto_confs_batch220.txt | 1984 +- .../tuner_promise_confs_batch220_multi.txt | 7284 +++---- .../tuner_promise_confs_batch220_single.txt | 7322 +++---- .../autotuner_data/tuner_confs_batch220.txt | 12448 ++++++------ .../tuner_pareto_confs_batch220.txt | 3168 ++-- .../tuner_promise_confs_batch220_multi.txt | 10540 +++++----- .../tuner_promise_confs_batch220_single.txt | 5996 +++--- 32 files changed, 74341 insertions(+), 74341 deletions(-) diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/data/autotuner_data/tuner_confs_batch220.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/data/autotuner_data/tuner_confs_batch220.txt index 4537eed23c..27ae10a437 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/data/autotuner_data/tuner_confs_batch220.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/data/autotuner_data/tuner_confs_batch220.txt @@ -10,841 +10,841 @@ conf1 1 0 78.75 0 ----- +++++ conf1 1.58691558324 0 78.559998 0.5900020000000069 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf2 1.561580129 0 78.279991 0.7050135000000068 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf3 1.58691558324 0 78.480003 0.6699970000000036 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf4 1.62843286633 0 78.139999 0.9150014999999954 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf5 1.561580129 0 78.199997 0.8250045000000057 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 34 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 34 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf6 1.5 0 78.840004 0.3099960000000067 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf7 1.63231514248 0 78.180008 0.8549879999999987 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf8 1.561580129 0 78.400002 0.524996999999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf9 1.62843286633 0 78.219994 0.7950090000000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf10 1.68452312305 0 78.55999 0.5900100000000009 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf11 1.59477017142 0 78.139999 0.9150014999999954 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf12 1.58691558324 0 78.480003 0.6699970000000036 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf13 1.58691558324 0 78.400002 0.524996999999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf14 1.63231514248 0 78.119995 0.9450074999999956 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf15 1.58691558324 0 78.559998 0.5900020000000069 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf16 1.58691558324 0 78.400009 0.5249865000000042 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf17 1.540499209 0 78.139999 0.9150014999999954 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf18 1.70550344452 0 77.979996 1.1550060000000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 34 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 34 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf19 1.60552156231 0 77.719994 1.5450090000000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 35 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 35 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf20 1.7855629355 0 77.639999 1.6650014999999954 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf21 1.69337635738 0 77.599998 1.725003000000001 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf22 1.69337635738 0 77.68 1.6049999999999898 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf23 1.70550344452 0 77.580002 1.7549970000000101 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 35 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 35 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf24 1.80498002224 0 77.479996 1.9050060000000002 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf25 1.7496306648 0 78.060005 1.0349924999999942 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf26 1.80498002224 0 77.500008 1.874988000000009 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf27 1.69337635738 0 77.639999 1.6650014999999954 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf28 1.7855629355 0 77.639999 1.6650014999999954 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf29 1.81876478645 0 77.499992 1.875011999999991 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 35 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 35 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf30 1.7357694593 0 78.040001 1.0649984999999944 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf31 1.81876478645 0 77.580002 1.7549970000000101 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 35 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 35 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf32 1.73138014145 0 77.879997 1.3050044999999955 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf33 1.7357694593 0 78.040001 1.0649984999999944 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf34 1.59477017142 0 78.139999 0.9150014999999954 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf35 1.63231514248 0 77.939995 1.2150075000000058 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf36 1.7855629355 0 77.5 1.875 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf37 1.62843286633 0 78.219994 0.7950090000000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf38 1.80498002224 0 77.540001 1.8149984999999944 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf39 1.69337635738 0 77.860001 1.3349985000000046 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf40 1.69337635738 0 77.659996 1.63500599999999 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf41 1.64654492165 0 77.219994 2.2950090000000003 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf42 1.70550344452 0 77.659996 1.63500599999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf43 1.63426323052 0 76.900002 2.774996999999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf44 1.85072010812 0 77.339996 2.115006000000001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf45 1.70763025603 0 76.860001 2.8349985000000046 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 -5 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 +5 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf46 1.75805416249 0 77.139999 2.4150014999999954 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 -5 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 +5 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf47 1.80736234275 0 76.759995 2.9850074999999947 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf48 1.70550344452 0 77.040001 2.5649984999999944 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf49 1.65600317448 0 77.619995 1.6950074999999956 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf50 1.75498605481 0 77.180008 2.3549879999999987 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf51 1.85825452695 0 77.340004 2.11499400000001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf52 1.8290896189 0 77.199997 2.3250045000000057 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 34 add fp32 1 tanh fp32 1 -5 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 34 add fp16 1 tanh fp16 1 +5 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf53 1.68659789846 0 77.139999 2.4150014999999954 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf54 1.85072010812 0 77.139999 2.4150014999999954 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf55 1.65800824851 0 77.080002 2.50499700000001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 -4 gpu conv samp 34 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 +4 gpu conv samp 34 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf56 1.98484848485 0 77.300003 2.1749954999999943 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf57 1.70550344452 0 77.379997 2.0550044999999955 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf58 1.58875675284 0 76.819992 2.8950120000000013 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf59 1.85072010812 0 76.959999 2.6850015000000056 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf60 1.70550344452 0 77.259995 2.2350074999999947 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 34 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 34 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf61 1.90544418837 0 76.959999 2.6850015000000056 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 -4 gpu conv samp 34 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 +4 gpu conv samp 34 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf62 1.98484848485 0 76.900002 2.774996999999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf63 1.64654492165 0 76.779999 2.9550014999999945 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf64 1.7451710542 0 76.840004 2.86499400000001 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf65 1.80736234275 0 76.979996 2.655006 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf66 1.98484848485 0 77.520004 1.8449939999999998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf67 1.90544418837 0 76.919998 2.74500299999999 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 -4 gpu conv samp 34 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 +4 gpu conv samp 34 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf68 1.62330389945 0 76.940002 2.7149969999999897 -1 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf69 1.733572022 0 77.980003 1.1549955000000054 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf70 1.82847903192 0 77.279999 2.2050014999999945 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 -4 gpu conv samp 34 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 +4 gpu conv samp 34 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf71 1.75805416249 0 76.880005 2.8049925000000044 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 -5 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 +5 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf72 1.79728066937 0 77.019997 2.5950044999999946 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf73 1.90544418837 0 76.780006 2.9549909999999997 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf74 1.91812212738 0 76.900002 2.774996999999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf75 1.80736234275 0 77.0 2.625 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf76 1.85072010812 0 77.139999 2.4150014999999954 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf77 1.60036156459 0 77.479996 1.9050060000000002 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf78 1.69547301219 0 77.339996 2.115006000000001 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf79 1.80736234275 0 77.340004 2.11499400000001 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf80 1.64654492165 0 76.900002 2.774996999999999 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf81 1.70550344452 0 76.940002 2.7149969999999897 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf82 1.70550344452 0 76.759995 2.9850074999999947 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf83 1.75805416249 0 76.82 2.8950000000000102 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 -5 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 +5 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf84 1.91009546227 0 76.779991 2.955013500000007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/data/autotuner_data/tuner_pareto_confs_batch220.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/data/autotuner_data/tuner_pareto_confs_batch220.txt index 6bad071981..31885ce6fb 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/data/autotuner_data/tuner_pareto_confs_batch220.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/data/autotuner_data/tuner_pareto_confs_batch220.txt @@ -10,371 +10,371 @@ conf1 1 0 78.75 0 ----- +++++ conf1 1.58691558324 0 78.559998 0.5900020000000069 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf2 1.561580129 0 78.279991 0.7050135000000068 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf3 1.58691558324 0 78.480003 0.6699970000000036 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf4 1.62843286633 0 78.139999 0.9150014999999954 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf5 1.561580129 0 78.199997 0.8250045000000057 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 34 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 34 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf6 1.5 0 78.840004 0.3099960000000067 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf7 1.63231514248 0 78.180008 0.8549879999999987 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf8 1.561580129 0 78.400002 0.524996999999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf9 1.62843286633 0 78.219994 0.7950090000000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf10 1.68452312305 0 78.55999 0.5900100000000009 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf11 1.59477017142 0 78.139999 0.9150014999999954 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf12 1.58691558324 0 78.480003 0.6699970000000036 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf13 1.58691558324 0 78.400002 0.524996999999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf14 1.63231514248 0 78.119995 0.9450074999999956 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf15 1.58691558324 0 78.559998 0.5900020000000069 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf16 1.58691558324 0 78.400009 0.5249865000000042 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf17 1.540499209 0 78.139999 0.9150014999999954 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf18 1.70550344452 0 77.979996 1.1550060000000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 34 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 34 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf19 1.7855629355 0 77.639999 1.6650014999999954 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf20 1.80498002224 0 77.479996 1.9050060000000002 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf21 1.7496306648 0 78.060005 1.0349924999999942 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf22 1.80498002224 0 77.500008 1.874988000000009 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf23 1.7855629355 0 77.639999 1.6650014999999954 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf24 1.81876478645 0 77.499992 1.875011999999991 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 35 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 35 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf25 1.7357694593 0 78.040001 1.0649984999999944 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf26 1.81876478645 0 77.580002 1.7549970000000101 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 35 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 35 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf27 1.73138014145 0 77.879997 1.3050044999999955 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf28 1.7357694593 0 78.040001 1.0649984999999944 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf29 1.59477017142 0 78.139999 0.9150014999999954 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf30 1.7855629355 0 77.5 1.875 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf31 1.62843286633 0 78.219994 0.7950090000000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf32 1.80498002224 0 77.540001 1.8149984999999944 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf33 1.70550344452 0 77.659996 1.63500599999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf34 1.98484848485 0 77.300003 2.1749954999999943 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf35 1.98484848485 0 76.900002 2.774996999999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf36 1.98484848485 0 77.520004 1.8449939999999998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf37 1.733572022 0 77.980003 1.1549955000000054 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/data/autotuner_data/tuner_promise_confs_batch220_multi.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/data/autotuner_data/tuner_promise_confs_batch220_multi.txt index a4d65ba5ca..76d3b9342f 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/data/autotuner_data/tuner_promise_confs_batch220_multi.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/data/autotuner_data/tuner_promise_confs_batch220_multi.txt @@ -10,14251 +10,14251 @@ conf1 1 0 78.75 0 ----- +++++ conf1 1.79680900793 0 78.44000105 0.46499842499999033 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf2 2.14833063686 0 78.422500275 0.49124958749999337 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf3 2.3343756992 0 78.279499775 0.7057503374999925 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf4 2.0051937949 0 78.307999725 0.6630004124999971 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf5 2.09572432924 0 78.17899945 0.8565008249999906 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf6 2.1510303661 0 78.341000325 0.613499512500006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf7 1.59477017142 0 78.099998 0.975003000000001 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf8 2.40933283482 0 78.21199995 0.8070000749999906 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf9 2.44133377904 0 78.2895008 0.6907488000000015 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf10 2.10377358491 0 78.520998975 0.6290010250000023 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf11 1.99206710006 0 78.244000225 0.7589996625000097 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf12 2.49016877877 0 78.264999025 0.7275014625000082 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf13 1.92297058901 0 78.354999825 0.5925002625000104 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf14 3.28260543861 0 78.11449965 0.9532505250000014 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf15 2.1510303661 0 78.34250065 0.6112490249999922 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf16 2.70433982991 0 77.53800065 1.8179990249999989 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf17 2.50154135982 0 78.228999325 0.7815010124999944 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf18 2.97747502888 0 78.058500175 1.037249737499991 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf19 3.16135199481 0 77.658500425 1.6372493625000004 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf20 2.21882120614 0 78.016000725 1.1009989125000033 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf21 2.97779876864 0 77.5135 1.85475000000001 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf22 2.36542510121 0 77.562499475 1.7812507875000065 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 34 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 34 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf23 3.84266973008 0 77.68899955 1.5915006749999918 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf24 2.83830787766 0 77.668499175 1.6222512375000093 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf25 2.97779876864 0 77.433499775 1.974750337499998 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf26 2.99538587737 0 77.850000275 1.349999587500001 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf27 2.25834132162 0 78.0424999 1.0612501500000064 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf28 2.91566305143 0 77.9264999 1.235250150000006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf29 3.45487014523 0 77.562000175 1.7819997375000085 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf30 2.98884796399 0 77.723499625 1.5397505624999965 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf31 3.59076159103 0 77.520999325 1.843501012499992 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf32 2.11760015464 0 77.5674998 1.7737503000000103 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf33 3.18710451669 0 77.66400005 1.6289999250000022 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf34 3.53240628779 0 77.666998925 1.6245016124999978 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf35 1.66455840456 0 77.519997 1.8450044999999946 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 35 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 35 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf36 3.72963656481 0 77.71250025 1.5562496249999924 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf37 3.26928242088 0 77.708 1.5630000000000024 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf38 2.91566305143 0 77.997500225 1.128749662500006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf39 3.37566443263 0 77.764000275 1.4789995874999988 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf40 2.16857363436 0 77.986500425 1.145249362499996 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf41 3.45051734728 0 77.757000325 1.4894995124999895 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf42 2.84007388684 0 77.906500025 1.2652499625000004 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf43 2.5234379499 0 77.836999325 1.3695010125000096 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf44 1.99424293451 0 77.279999 2.2050014999999945 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf45 2.22694008233 0 77.5514999 1.797750150000006 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf46 2.51071449035 0 78.122998625 0.9405020625000091 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf47 3.70240971999 0 77.544499675 1.808250487500004 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf48 3.14863117051 0 77.64799975 1.6530003750000049 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf49 3.542000097 0 78.033500025 1.0747499625000074 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf50 3.11638574781 0 77.679000075 1.606499887499993 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf51 3.24180661389 0 78.1409995 0.9135007499999901 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf52 3.59736474838 0 77.7224991 1.5412513500000102 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf53 2.15509270644 0 78.2715006 0.717749100000006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf54 2.07523892514 0 77.4340001 1.9739998499999913 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf55 2.89104044336 0 77.61650025 1.7002496249999979 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf56 2.40299088586 0 77.75 1.5 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf57 2.57467948341 0 77.748999225 1.5015011624999914 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf58 3.4918718623 0 77.708 1.5630000000000024 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf59 2.41549528692 0 77.5825003 1.75124954999999 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf60 3.11638574781 0 77.53900055 1.8164991750000041 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf61 3.59076159103 0 77.694499775 1.5832503375000044 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf62 3.45487014523 0 77.76799985 1.473000225000007 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf63 3.4918718623 0 77.537499975 1.8187500374999956 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf64 1.80498002224 0 77.779999 1.4550014999999945 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf65 2.16857363436 0 77.798499675 1.4272504874999967 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf66 2.64786391927 0 77.61000005 1.7099999250000053 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf67 3.59736474838 0 77.7579992 1.4880011999999994 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf68 3.25529306887 0 77.687499775 1.5937503374999906 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf69 2.86173769935 0 77.427999275 1.9830010874999928 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf70 2.38769738778 0 77.39400115 2.033998275000009 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf71 4.89920842557 0 78.051000325 1.048499512499994 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf72 4.79205397957 0 78.038000275 1.0679995874999975 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf73 3.09875539212 0 77.273500075 2.2147498874999982 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf74 3.31960879381 0 77.9119997 1.2570004500000067 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf75 4.19140856656 0 78.20000065 0.8249990249999897 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf76 3.37358534144 0 77.6074995 1.7137507499999955 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf77 2.85576960676 0 78.0505001 1.0492498500000096 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf78 2.81931594457 0 77.081999925 2.502000112499992 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf79 4.27983318032 0 78.214000325 0.803999512499999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf80 2.53347556111 0 77.053999875 2.5440001874999965 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf81 3.44574192026 0 78.04700015 1.0544997749999965 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf82 3.84374835529 0 77.871499425 1.3177508625000058 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf83 4.2459932947 0 78.186999875 0.8445001875000031 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf84 3.89292123452 0 77.7725 1.4662500000000094 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf85 3.84374835529 0 77.8894992 1.2907511999999954 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf86 3.23567852554 0 78.021000575 1.0934991374999967 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf87 2.56431338585 0 78.252500825 0.7462487625000023 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf88 3.52603923588 0 78.007000675 1.1144989874999993 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf89 3.31960879381 0 77.615000125 1.7024998125000081 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf90 2.73868451644 0 76.974999125 2.662501312500005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf91 2.73485576623 0 77.002500775 2.621248837499998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf92 4.71776016882 0 78.118500375 0.9472494375000053 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf93 4.50808625337 0 78.1879999 0.8430001500000088 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf94 2.4335243744 0 77.39600035 2.0309994750000087 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf95 2.28224185165 0 77.94599965 1.2060005249999932 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf96 4.69511411122 0 78.0050009 1.1174986500000017 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf97 4.52821122249 0 78.1555004 0.891749400000009 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf98 2.94070867727 0 77.411500725 2.0077489125000056 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf99 3.51112161664 0 77.876499375 1.3102509375000082 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf100 3.91238549312 0 77.8210001 1.3934998499999907 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf101 3.43063493306 0 77.99849985 1.1272502249999974 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf102 2.61175481887 0 77.0305003 2.57924955 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf103 2.1510303661 0 78.399499725 0.5257504125000025 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf104 2.33079967234 0 77.677000425 1.6094993624999958 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf105 2.38915544409 0 77.3554993 2.091751049999992 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf106 3.18821757541 0 77.948999775 1.2015003374999935 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf107 2.38769738778 0 77.397999975 2.028000037499993 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf108 3.30518638085 0 78.03400095 1.0739985749999903 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf109 3.18821757541 0 77.3555 2.0917499999999905 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf110 3.93148091658 0 78.255500075 0.7417498874999993 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf111 3.47768289392 0 77.605998825 1.7160017624999995 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf112 4.35330525145 0 78.23450025 0.7732496250000054 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf113 2.16634367244 0 78.242499975 0.7612500374999982 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf114 4.49698282055 0 78.02350005 1.0897499250000067 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf115 3.76294547016 0 77.81649915 1.4002512750000022 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf116 3.10050944598 0 78.053000775 1.0454988374999985 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf117 3.76294547016 0 77.813500225 1.4047496625000022 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf118 2.24888375674 0 76.91599985 2.7510002249999914 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf119 2.90176147275 0 77.430000025 1.9799999625000027 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf120 4.36510071125 0 78.1990006 0.8264990999999924 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf121 2.73868451644 0 77.863499275 1.3297510875000071 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf122 2.14345461127 0 77.9280001 1.2329998499999917 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf123 2.58610616022 0 78.1555008 0.891748800000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf124 2.76355621145 0 77.811499725 1.4077504124999933 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf125 3.1705014109 0 76.864499725 2.8282504124999974 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf126 4.59661176964 0 77.974500725 1.163248912500002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf127 3.46580033852 0 77.617499075 1.6987513875000033 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf128 3.10050944598 0 77.380999775 2.0535003374999903 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf129 3.4869813633 0 77.989000375 1.141499437499995 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf130 4.82158183139 0 78.06699995 1.024500075000006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf131 3.26850208106 0 77.3024992 2.1712512000000004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf132 2.91566305143 0 77.826999475 1.3845007875000093 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf133 3.89292123452 0 77.9070005 1.2644992500000072 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf134 2.22332436958 0 77.196999925 2.3295001125 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf135 3.96334183535 0 77.79850045 1.4272493249999911 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf136 4.27315546194 0 78.25700055 0.7394991749999988 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf137 1.60552156231 0 78.099998 0.975003000000001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf138 3.6515641406 0 77.886500175 1.2952497375000078 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf139 4.33470175092 0 78.1860006 0.8459991000000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf140 4.00390160999 0 78.03400095 1.0739985749999903 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf141 3.06923723471 0 77.48800035 1.8929994750000105 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf142 2.51025423632 0 77.7379995 1.5180007499999988 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf143 2.2071979046 0 77.258999675 2.2365004875000025 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf144 2.24152130544 0 78.266000775 0.7259988375000077 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf145 1.73138014145 0 78.139999 0.9150014999999954 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf146 2.07194193579 0 78.4889999 0.661000100000004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf147 1.95369833697 0 78.272500375 0.7162494374999895 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf148 2.48204433972 0 78.247500575 0.7537491374999945 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf149 2.6038089013 0 78.27349985 0.7147502250000102 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf150 2.28186154535 0 78.229499425 0.7807508624999997 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf151 1.59477017142 0 78.099998 0.975003000000001 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf152 1.82360897249 0 78.5300003 0.6199997000000025 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf153 1.76439678846 0 78.203000575 0.8204991374999935 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf154 1.72755765819 0 78.1744998 0.86325029999999 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf155 2.10377358491 0 78.471999975 0.6780000249999972 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf156 2.89256716615 0 78.17850095 0.8572485749999998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf157 2.18361621336 0 78.29599945 0.6810008250000052 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf158 1.561580129 0 78.119995 0.9450074999999956 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 34 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 34 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf159 1.95369833697 0 78.40800075 0.512998875000001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf160 2.77391276825 0 78.2810004 0.7034994000000054 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf161 2.10636242153 0 78.433500125 0.4747498125000078 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf162 1.73138014145 0 78.159996 0.88500599999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf163 2.16634367244 0 78.23250085 0.7762487250000092 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf164 2.53160205673 0 78.22899975 0.7815003750000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf165 1.75142089738 0 78.1519998 0.897000300000002 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf166 3.59736474838 0 77.738998525 1.5165022125000007 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf167 2.55188860547 0 78.203000475 0.8204992875000059 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf168 2.9094681628 0 77.8784995 1.3072507499999944 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf169 2.12252361347 0 78.12700005 0.9344999249999901 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf170 3.77643621697 0 77.486499575 1.8952506375000056 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf171 3.72963656481 0 77.683000175 1.6004997374999945 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf172 3.49811600913 0 77.6029999 1.7205001499999995 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf173 2.5234379499 0 77.871999325 1.3170010124999934 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf174 1.82360897249 0 78.489000675 0.6609993250000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf175 2.15086143425 0 77.46950055 1.9207491749999903 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 34 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 34 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf176 3.11638574781 0 77.4890009 1.8914986500000097 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf177 3.35910310464 0 77.72399955 1.539000674999997 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf178 2.60281905984 0 77.490499375 1.8892509375000017 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 34 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 34 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf179 2.24796080644 0 77.690000325 1.5899995125000004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 34 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 34 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf180 3.25529306887 0 77.705499025 1.566751462500008 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf181 3.46799468161 0 77.7030001 1.5704998500000045 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf182 2.63411159335 0 77.6099998 1.7100003000000044 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf183 2.99538587737 0 77.82599925 1.386001125000007 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf184 3.44574192026 0 78.058 1.0379999999999896 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf185 3.33253479352 0 78.095999875 0.9810001874999941 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf186 3.49811600913 0 77.75849985 1.4872502249999897 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf187 2.35829610893 0 77.731499975 1.5277500374999917 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf188 3.04907663832 0 78.093499975 0.9847500374999996 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf189 2.99538587737 0 77.74999995 1.5000000749999955 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf190 3.542000097 0 78.0260006 1.085999099999995 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf191 3.28812300212 0 77.769999725 1.4700004124999921 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf192 2.87133870651 0 77.894500375 1.2832494374999897 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf193 2.73950635808 0 77.528499225 1.8322511624999933 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf194 2.91566305143 0 77.9804998 1.154250299999994 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf195 1.7977525785 0 78.559999475 0.5900005250000021 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf196 2.24152130544 0 78.173999375 0.8640009375000091 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf197 3.20989056068 0 78.09300135 0.9854979750000084 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf198 3.48564996739 0 78.1090001 0.9614998499999956 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf199 2.89256716615 0 78.160499175 0.8842512375000027 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf200 2.25834132162 0 78.08599985 0.9960002250000102 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf201 2.97779876864 0 77.515499675 1.8517504874999986 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf202 3.3268673034 0 78.0785003 1.0072495499999974 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf203 3.06580144126 0 77.74699955 1.5045006750000027 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf204 3.02884415997 0 78.163000075 0.880499887500001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf205 2.15594095941 0 77.552500875 1.7962486874999897 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 34 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 34 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf206 3.66721064524 0 77.7285002 1.5322497000000013 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf207 2.11760015464 0 77.764000125 1.4789998125000068 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf208 2.59640338923 0 78.2045001 0.8182498499999937 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf209 3.59736474838 0 77.5884993 1.7422510500000072 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf210 2.52227594543 0 77.390499625 2.039250562499994 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf211 2.72018747284 0 77.93899955 1.2165006749999918 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf212 2.97779876864 0 77.454499975 1.9432500374999933 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf213 2.89104044336 0 77.838500175 1.3672497375000106 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf214 2.21882120614 0 78.002499375 1.121250937500001 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf215 2.55046272045 0 77.49200035 1.8869994750000032 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 34 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 34 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf216 3.16135199481 0 77.674499775 1.6132503374999985 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf217 2.6038089013 0 77.727500675 1.5337489874999974 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf218 3.77643621697 0 77.722000675 1.5419989874999942 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf219 2.74609889077 0 77.9474999 1.2037501500000047 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf220 3.39701846598 0 77.776500325 1.460249512499999 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf221 3.84266973008 0 77.48100015 1.9034997750000002 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf222 2.41443054714 0 77.704000325 1.5689995125000067 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf223 2.13476659554 0 78.07549955 1.0117506749999947 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf224 3.72963656481 0 77.4985005 1.8772492499999913 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf225 1.95369833697 0 77.791000025 1.4384999624999963 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf226 2.57467948341 0 77.77599975 1.4610003750000047 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf227 3.84266973008 0 77.6624994 1.6312508999999977 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf228 3.06580144126 0 77.5534998 1.794750300000004 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf229 2.25536569046 0 77.7035 1.569749999999992 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf230 2.41443054714 0 77.626499625 1.6852505625000092 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf231 3.24718776399 0 78.105000425 0.9674993624999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf232 3.28260543861 0 78.098500625 0.9772490625000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf233 2.43612195203 0 78.246499875 0.7552501875000033 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf234 3.13780880773 0 76.97750055 2.658749174999997 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf235 3.3268673034 0 78.065999625 1.0260005624999948 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf236 2.56431338585 0 78.25149865 0.7477520249999969 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf237 4.58200012548 0 78.19099955 0.8385006749999988 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf238 4.82158183139 0 78.057500275 1.038749587500007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf239 3.542000097 0 78.080500425 1.0042493625000048 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf240 3.78112865648 0 77.95499995 1.192500074999998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf241 4.59198750865 0 78.1270008 0.934498799999993 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf242 3.14537663121 0 78.0940003 0.983999549999993 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf243 3.52150537634 0 77.5499994 1.8000008999999935 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf244 3.13780880773 0 76.9555004 2.691749399999992 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf245 4.44155567719 0 78.23850005 0.7672499250000016 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf246 3.53377364881 0 77.57250005 1.7662499249999968 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf247 3.45138702919 0 78.04249975 1.061250374999993 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf248 2.68682935802 0 78.22300035 0.7904994749999901 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf249 3.48564996739 0 78.1284998 0.9322502999999998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf250 3.06923723471 0 78.0770008 1.00949880000001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf251 4.79877127275 0 78.10549985 0.9667502249999984 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf252 4.27315546194 0 78.240000375 0.7649994375000091 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf253 4.22633629753 0 78.162500175 0.8812497374999921 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf254 3.05997821259 0 78.132000325 0.9269995124999895 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf255 2.89256716615 0 78.13600105 0.9209984249999934 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf256 3.91238549312 0 77.926999475 1.2345007874999965 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf257 4.77617552809 0 78.092500275 0.9862495874999908 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf258 5.01218173084 0 78.113499925 0.9547501125000011 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf259 2.88586161931 0 78.1765001 0.8602498500000024 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf260 3.6350853616 0 77.53449935 1.8232509749999934 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf261 2.97747502888 0 78.116499725 0.9502504125000044 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf262 2.28262228473 0 77.182000175 2.3519997375000017 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf263 3.06923723471 0 77.48500025 1.8974996250000018 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf264 3.51112161664 0 77.910499175 1.2592512375000027 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf265 2.85576960676 0 78.001500175 1.1227497374999942 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf266 2.48881101405 0 78.018000625 1.0979990625000013 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf267 4.60279195815 0 78.146000725 0.9059989125000101 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf268 2.12995187868 0 77.7119994 1.5570009000000056 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf269 3.13493539756 0 77.98199935 1.152000975000007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf270 2.89378970586 0 78.0650002 1.0274997 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf271 2.27126418908 0 77.665500325 1.6267495125000053 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf272 3.6583930271 0 77.962000625 1.1819990624999974 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf273 3.44574192026 0 78.068000425 1.0229993625000091 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf274 4.37206912378 0 78.17450055 0.8632491749999929 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf275 3.22311222914 0 77.244499575 2.258250637499991 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf276 4.19140856656 0 78.261499975 0.73275003749999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf277 2.72831704128 0 77.88249915 1.3012512749999985 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf278 2.87888443598 0 77.3114994 2.1577508999999964 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 7 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf279 3.23567852554 0 78.033500475 1.0747492875000049 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf280 4.623773346 0 78.131000475 0.92849928750001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf281 3.18636290921 0 77.2869997 2.1945004500000067 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf282 3.02884415997 0 78.127999625 0.9330005624999984 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf283 2.33079967234 0 77.693999825 1.5840002624999912 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf284 3.31800009086 0 77.2995003 2.175749549999992 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf285 2.54123316747 0 77.18099925 2.353501125000001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf286 4.20879996926 0 78.112000075 0.9569998875000039 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 7 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf287 3.91238549312 0 77.849500425 1.3507493624999967 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf288 3.23567852554 0 77.448000525 1.9529992125000035 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf289 4.36510071125 0 78.22050075 0.794248875000001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf290 2.24152130544 0 78.317000025 0.649499962500002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf291 2.0941218638 0 78.053001275 1.0454980875000004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf292 2.48971602595 0 77.795999775 1.4310003375000022 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf293 2.35323022394 0 77.025 2.5874999999999915 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf294 2.61724582801 0 78.211499625 0.8077505624999972 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf295 3.04365492811 0 77.019500125 2.59574981250001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf296 2.95498685009 0 77.960999875 1.1835001875000017 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf297 4.60279195815 0 78.144500275 0.9082495875000021 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf298 3.11780484681 0 77.582999275 1.7505010874999911 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf299 2.18431284582 0 78.0480003 1.0529995500000027 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf300 4.69591915468 0 78.0425009 1.0612486500000102 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf301 2.7974298081 0 77.64500005 1.6574999250000104 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf302 3.96334183535 0 77.769499775 1.4707503375000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf303 2.24630140357 0 77.273000925 2.2154986124999922 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf304 2.48971602595 0 77.8425003 1.3612495500000037 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf305 4.90008498647 0 78.0524994 1.0462508999999969 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf306 4.06752993595 0 78.22199955 0.7920006749999899 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf307 3.42634295097 0 77.590999925 1.7385001124999917 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf308 2.75993575613 0 78.089500225 0.9907496625000078 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf309 2.18361621336 0 78.335000675 0.6224989874999949 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf310 2.73376380311 0 78.257501 0.7387484999999927 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf311 2.10377358491 0 78.460000625 0.6899993749999936 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf312 2.44133377904 0 78.269500125 0.72074981250001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf313 2.24152130544 0 78.299499075 0.6757513875000001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf314 2.28186154535 0 78.26600015 0.7259997750000053 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf315 1.82360897249 0 78.52550125 0.6244987499999951 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf316 1.92405135546 0 78.307500225 0.6637496625000026 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf317 2.59394423726 0 78.3124998 0.6562503000000035 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf318 3.28260543861 0 78.185500225 0.8467496625000024 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf319 2.40933283482 0 78.194999875 0.8325001875000098 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf320 1.95369833697 0 78.36700095 0.5744985749999927 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf321 2.53160205673 0 78.238499775 0.767250337500009 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf322 2.20630884648 0 77.766000125 1.4759998124999925 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf323 2.20630884648 0 77.511499375 1.8577509375000005 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf324 3.59076159103 0 77.6839995 1.5990007500000019 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf325 2.46129952706 0 77.57799985 1.7580002250000035 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf326 2.43612195203 0 78.212499475 0.806250787499998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf327 2.97779876864 0 77.555500275 1.7917495875 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf328 3.35910310464 0 77.75199935 1.4970009749999917 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf329 2.51071449035 0 78.129999325 0.9300010125 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf330 2.80373024506 0 77.904000875 1.2689986875000088 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf331 2.36440403604 0 78.2385001 0.767249850000006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf332 2.11760015464 0 77.418999675 1.9965004875000076 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf333 2.9094681628 0 77.7544996 1.493250599999996 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf334 3.49811600913 0 77.752500125 1.4962498125000039 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf335 2.46129952706 0 77.653499775 1.6447503374999997 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf336 2.41230388109 0 77.85100005 1.3484999250000058 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf337 2.31738854514 0 77.7519991 1.4970013499999908 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf338 2.64786391927 0 77.538499825 1.8172502624999964 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf339 3.16135199481 0 77.70349975 1.569750374999991 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf340 2.52227594543 0 77.38650035 2.0452494749999914 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf341 2.55188860547 0 78.20650025 0.8152496249999928 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf342 1.96561700982 0 77.71699945 1.5495008249999955 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf343 3.84266973008 0 77.680000075 1.604999887500007 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf344 2.70433982991 0 77.582999775 1.750500337499993 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf345 3.82389130321 0 77.602500675 1.7212489874999974 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf346 3.59736474838 0 77.740500675 1.5142489874999896 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf347 2.99342150206 0 77.7940007 1.433998950000003 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf348 2.81381236756 0 77.83299985 1.3755002250000103 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf349 1.80023417279 0 77.599998 1.725003000000001 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf350 2.44133377904 0 78.330501225 0.6292481624999908 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf351 3.20613284165 0 78.161000225 0.8834996625000073 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf352 1.85888516523 0 77.180008 2.3549879999999987 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf353 3.84266973008 0 77.5400003 1.814999549999996 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf354 2.99342150206 0 77.521999425 1.8420008624999937 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf355 3.35910310464 0 77.586499975 1.745250037500007 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf356 3.45487014523 0 77.69100055 1.5884991750000026 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf357 3.16135199481 0 77.4395008 1.965748799999993 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf358 2.31347341745 0 77.3290006 2.131499099999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf359 1.952305636 0 78.136499725 0.9202504125000104 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf360 3.33253479352 0 78.1575004 0.8887493999999947 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf361 3.49811600913 0 77.593998925 1.734001612500009 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf362 3.02884415997 0 78.183499725 0.8497504124999935 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf363 1.69337635738 0 77.660004 1.634993999999999 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf364 3.44314270897 0 77.625999025 1.686001462500002 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf365 3.59076159103 0 77.5799995 1.7550007500000007 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf366 2.73376380311 0 78.233000525 0.7754992125000086 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf367 1.99119809512 0 77.959000225 1.1864996625000046 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf368 1.99119809512 0 77.9495002 1.2007496999999958 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf369 2.91566305143 0 77.889000725 1.2914989124999963 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf370 1.89227879259 0 77.919000675 1.24649898749999 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf371 2.52903050801 0 77.842999125 1.3605013124999914 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf372 2.9094681628 0 77.85900075 1.3364988749999895 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf373 2.13476659554 0 78.0249998 1.087500299999995 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf374 2.20915635682 0 78.125500425 0.9367493625000023 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf375 3.24718776399 0 78.100999375 0.973500937499999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf376 3.0068963151 0 78.096499225 0.9802511624999966 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf377 3.542000097 0 78.04100075 1.0634988750000076 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf378 2.19745749962 0 77.652999675 1.6455004874999943 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf379 2.99342150206 0 77.650999025 1.6485014624999934 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf380 1.9453718091 0 77.995001075 1.132498387499993 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf381 3.70240971999 0 77.7160008 1.550998799999995 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf382 2.76271987895 0 77.802500325 1.4212495125000046 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf383 2.58317907967 0 77.9939991 1.1340013500000055 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf384 3.59736474838 0 77.74700055 1.5044991750000065 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf385 3.18636290921 0 77.23649915 2.2702512749999997 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf386 3.31960879381 0 77.9584999 1.18725014999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf387 3.89292123452 0 77.87799985 1.3080002250000078 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf388 3.17822794726 0 77.42349975 1.9897503749999927 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf389 3.29564085979 0 78.0885006 0.9922490999999951 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf390 3.08305776402 0 78.231000325 0.7784995125000052 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf391 3.70341103092 0 78.023501225 1.089748162499994 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf392 3.35622156523 0 77.801499775 1.4227503375000055 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf393 3.27045362948 0 77.406000875 2.0159986874999944 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf394 2.28262228473 0 77.06800045 2.5229993250000007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf395 4.44155567719 0 78.2120006 0.8069991000000059 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf396 2.56479367867 0 76.86149995 2.832750075000007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf397 2.7524465773 0 78.206000125 0.8159998124999959 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf398 3.04365492811 0 77.0369993 2.5695010499999924 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf399 3.6583930271 0 77.957500375 1.1887494375000074 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf400 4.50808625337 0 78.127499175 0.9337512375000045 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf401 3.69591437391 0 77.770499875 1.469250187500002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf402 2.07194193579 0 78.439500175 0.4657497374999906 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf403 2.18431284582 0 77.059500125 2.5357498125000006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf404 4.59661176964 0 78.074500025 1.0132499624999909 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf405 3.1808118349 0 78.0770005 1.0094992500000046 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf406 2.88343094032 0 77.0700001 2.519999849999998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf407 2.28186154535 0 78.2985008 0.677248800000001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf408 3.6515641406 0 77.918499575 1.2472506375000023 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf409 4.79205397957 0 78.042500725 1.0612489125000053 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf410 2.6262810908 0 77.943999425 1.2090008624999982 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf411 3.82870249017 0 77.869000425 1.3214993625000062 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf412 2.07823096988 0 77.89850065 1.2772490249999962 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf413 4.76952130091 0 78.0174999 1.0987501499999937 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf414 2.14731998471 0 77.153999725 2.3940004124999916 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf415 2.39521498147 0 78.326000725 0.6359989124999998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf416 2.86173769935 0 77.4560001 1.9409998500000043 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf417 3.96334183535 0 77.82450025 1.3882496250000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf418 2.28224185165 0 78.00700015 1.1144997750000059 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf419 1.67627481734 0 77.660004 1.634993999999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf420 2.16634367244 0 78.299500075 0.6757498875000039 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf421 2.7974298081 0 77.60100005 1.7234999250000058 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf422 2.50108446251 0 78.310998725 0.6585019124999931 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf423 3.26850208106 0 77.2965002 2.1802497000000045 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf424 2.66150192539 0 77.7190008 1.5464987999999948 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf425 3.09350511825 0 77.018500075 2.5972498874999914 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf426 2.72831704128 0 78.066001 1.0259985 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf427 3.52467785267 0 78.01100045 1.108499325000004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf428 2.59271640731 0 77.485999275 1.8960010875000037 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf429 2.49493264402 0 78.2500004 0.749999399999993 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf430 2.85576960676 0 78.026000425 1.0859993624999902 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf431 3.76294547016 0 77.828999675 1.3815004874999914 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf432 2.0941218638 0 77.1139997 2.4540004500000094 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf433 3.69591437391 0 77.8415009 1.3627486500000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf434 1.9453718091 0 78.101999725 0.9720004125000017 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf435 3.89292123452 0 77.844500175 1.3582497375000102 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf436 5.09612029865 0 77.941999725 1.2120004124999966 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf437 2.89256716615 0 78.16550005 0.8767499249999915 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf438 4.71776016882 0 78.11850035 0.9472494749999925 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf439 2.8298030352 0 77.880999375 1.3035009374999973 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf440 3.95190382569 0 78.191000375 0.8384994374999977 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf441 3.47768289392 0 77.62299965 1.6905005250000045 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf442 3.12527637116 0 78.017000175 1.0994997374999897 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf443 2.67371407651 0 77.793501225 1.4347481625 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf444 3.21441146697 0 78.02099995 1.0935000749999944 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf445 3.76294547016 0 77.8419996 1.3620006000000089 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf446 1.93165087062 0 77.81500075 1.4024988750000063 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf447 2.51025423632 0 77.7459997 1.506000450000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf448 3.53377364881 0 77.5685011 1.772248349999991 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf449 2.8298030352 0 77.78100075 1.453498874999994 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf450 2.22422719659 0 77.464500875 1.9282486875000018 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf451 3.09875539212 0 77.306499125 2.165251312499997 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf452 2.6974145891 0 77.7034999 1.5697501500000044 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf453 2.22332436958 0 77.232499975 2.276250037500006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf454 4.98028095379 0 77.969999525 1.1700007124999914 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf455 2.13926887138 0 77.15200025 2.3969996249999994 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf456 2.28224185165 0 77.863500975 1.3297485375000093 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf457 2.52227594543 0 78.082000875 1.0019986874999915 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf458 2.17563676084 0 77.40750025 2.0137496250000027 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf459 3.01949394303 0 77.348998825 2.101501762500007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf460 4.28586099646 0 78.075500675 1.011748987499999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf461 2.29948783073 0 77.73800035 1.5179994750000105 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf462 2.1510303661 0 78.33949995 0.615750074999994 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf463 2.43612195203 0 78.2609999 0.7335001499999976 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf464 2.06569084995 0 78.2925 0.686249999999994 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf465 2.28186154535 0 78.222999975 0.79050003750001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf466 1.95369833697 0 78.300999975 0.6735000375000055 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf467 1.95369833697 0 78.4525005 0.6974995000000007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf468 2.67606536954 0 78.300500525 0.6742492124999941 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf469 1.88123041257 0 78.36049985 0.5842502250000052 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf470 2.70007394171 0 78.23150005 0.7777499250000091 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf471 2.47665743514 0 78.271999125 0.7170013125000096 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf472 1.561580129 0 78.240005 0.7649925000000053 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf473 1.95369833697 0 78.39050035 0.5392494750000054 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf474 3.16135199481 0 77.47150085 1.9177487250000027 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf475 2.48136969546 0 77.644499775 1.6582503375000002 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf476 3.63363848948 0 77.451000325 1.9484995125000069 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf477 3.39238987381 0 78.0874994 0.993750900000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf478 2.29428673152 0 77.837999875 1.3680001875000087 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf479 1.68618253461 0 78.6015003 0.5484997000000021 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf480 3.53240628779 0 77.46799955 1.9230006749999973 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf481 2.60281905984 0 77.489999525 1.8900007124999973 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 34 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 34 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf482 2.71748837209 0 78.0590003 1.0364995500000092 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf483 3.45051734728 0 77.798499675 1.4272504874999967 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf484 2.89409550228 0 77.4129992 2.0055011999999977 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf485 3.53925369518 0 77.535499825 1.8217502624999966 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf486 2.98396322778 0 77.8309997 1.37850044999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf487 2.25536569046 0 77.7144989 1.553251650000007 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf488 2.72858886384 0 77.523000675 1.8404989874999913 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf489 3.84266973008 0 77.6484993 1.6522510500000038 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf490 2.97779876864 0 77.46650125 1.9252481250000102 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf491 2.27522664257 0 77.9424997 1.2112504500000014 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf492 2.47665743514 0 78.257500325 0.7387495125000072 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf493 2.44133377904 0 78.30250085 0.6712487249999981 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf494 2.57467948341 0 77.8404998 1.3642502999999948 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf495 1.80023417279 0 77.540001 1.8149984999999944 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf496 3.58371362677 0 78.022000175 1.0919997374999966 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf497 1.63231514248 0 78.120003 0.9449955000000045 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf498 2.25834132162 0 78.007999825 1.1130002625000017 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf499 3.89181483916 0 77.499499375 1.8757509375000012 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf500 3.16500541712 0 77.777499725 1.4587504125000024 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf501 3.36364124845 0 77.960000225 1.1849996624999974 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf502 2.79400512644 0 77.946000025 1.2059999624999946 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf503 2.97779876864 0 77.51400035 1.8539994749999948 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf504 1.9769681931 0 77.7790005 1.4564992500000074 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf505 3.53240628779 0 77.670000425 1.6199993625000033 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf506 2.74609889077 0 77.556999225 1.7895011625000024 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf507 2.52227594543 0 78.120000025 0.9449999625000061 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf508 3.19453962237 0 77.60100015 1.7234997749999934 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf509 3.89181483916 0 77.56599925 1.7760011249999934 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf510 2.48881101405 0 78.157000425 0.8894993624999898 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf511 3.42248933877 0 78.10499935 0.9675009750000001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf512 2.24980746522 0 77.93100035 1.2284994749999925 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf513 3.06580144126 0 77.490999725 1.888500412500008 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf514 1.76439678846 0 77.9490002 1.2014996999999994 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 34 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 34 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf515 1.93165087062 0 77.844499 1.3582515000000015 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf516 3.27671427503 0 78.0164993 1.10025104999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf517 3.63363848948 0 77.651499525 1.6477507124999917 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf518 3.59736474838 0 77.765499275 1.4767510875000056 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf519 2.67110637001 0 77.492998925 1.8855016125000077 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf520 3.03690484441 0 78.17399975 0.8640003750000105 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf521 3.82389130321 0 77.6294996 1.680750599999996 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf522 3.02516396272 0 77.7979996 1.4280006000000043 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf523 2.82425847867 0 77.53849985 1.8172502250000093 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf524 1.92906574394 0 77.805499025 1.4167514624999953 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf525 3.30478754695 0 77.53349955 1.824750674999997 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf526 2.09572432924 0 77.886498975 1.2952515375000075 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf527 4.77617552809 0 78.134999625 0.9225005624999909 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf528 4.60279195815 0 78.176000775 0.8609988374999915 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf529 3.1705014109 0 76.8984998 2.7772503000000057 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf530 3.28260543861 0 78.1204998 0.9442502999999931 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf531 2.92157268012 0 77.9825014 1.1512478999999942 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf532 3.50662601431 0 78.0274999 1.0837501500000073 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf533 4.44155567719 0 78.16200105 0.881998424999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf534 3.82870249017 0 77.802999825 1.4205002624999992 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf535 3.31800009086 0 77.221000125 2.293499812499995 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf536 2.16634367244 0 78.24850025 0.7522496249999904 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf537 5.01218173084 0 78.046000675 1.055998987499997 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf538 3.29564085979 0 78.073500675 1.014748987499992 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf539 2.14664674472 0 78.373999475 0.5640007874999924 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf540 2.19763383051 0 77.306 2.166000000000004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf541 2.66564670957 0 78.26750015 0.7237497749999946 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf542 2.63995156218 0 78.20250095 0.8212485749999985 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf543 3.27045362948 0 77.920999675 1.2435004874999933 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf544 3.6583930271 0 77.994000975 1.1339985374999912 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf545 2.79400512644 0 77.5879999 1.7430001500000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf546 3.04399322006 0 77.280999825 2.2035002625000075 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf547 2.83419571826 0 77.902499575 1.2712506375000103 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf548 3.50662601431 0 77.949499775 1.20075033749999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf549 2.57952953365 0 78.111000225 0.9584996625000031 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf550 4.13446115525 0 78.0035005 1.1197492499999981 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf551 2.42726970587 0 77.76500105 1.4774984250000074 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf552 3.99339287342 0 78.29699995 0.679500075 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf553 3.37358534144 0 77.605999575 1.7160006375000023 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf554 3.27045362948 0 77.372499225 2.066251162500002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf555 2.05715694099 0 77.576500125 1.7602498125000068 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf556 3.96334183535 0 77.767999875 1.4730001874999985 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf557 1.74018108892 0 78.051500175 1.0477497374999984 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf558 2.98396322778 0 77.902999825 1.2705002625000077 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf559 5.09612029865 0 77.92550035 1.2367494750000105 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf560 4.21982434853 0 78.17400035 0.863999475 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf561 3.20613284165 0 78.136000875 0.9209986875000098 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf562 4.66234332773 0 78.073500575 1.0147491375000044 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf563 4.82158183139 0 78.126500525 0.935249212500004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf564 2.6038089013 0 77.893000175 1.285499737500004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf565 2.53347556111 0 77.075499525 2.511750712500003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf566 3.08305776402 0 78.165998625 0.8760020624999996 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf567 2.32921384149 0 77.71800025 1.5479996249999957 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf568 2.16019226029 0 77.293500325 2.184749512500005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf569 4.69591915468 0 78.1205 0.9442499999999896 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf570 3.92134841516 0 78.143499975 0.9097500375000038 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf571 2.0941218638 0 78.173500425 0.8647493624999996 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf572 4.87565925629 0 78.01400025 1.1039996250000073 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf573 4.14008616617 0 78.264500775 0.7282488374999971 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf574 2.37280288508 0 78.309000975 0.6614985374999947 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf575 4.89920842557 0 77.99249985 1.1362502249999977 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf576 3.6350853616 0 77.5270003 1.8344995500000039 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf577 4.43580061952 0 78.04050025 1.0642496250000093 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf578 2.06631426065 0 78.141999575 0.9120006375000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf579 3.96334183535 0 77.7960007 1.43099895000001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf580 4.05728597619 0 78.16450025 0.8782496249999951 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf581 2.31895830908 0 77.97450015 1.1632497750000041 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf582 4.36510071125 0 78.1205004 0.9442494000000039 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf583 2.13793288512 0 77.946498825 1.2052517624999908 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf584 3.58371362677 0 77.994501075 1.1332483874999966 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf585 2.7974298081 0 77.668499975 1.6222500374999953 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf586 2.86533054339 0 78.151999525 0.8970007125000095 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf587 2.26300649679 0 77.1840002 2.3489997000000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf588 3.74288911287 0 78.276000225 0.7109996624999937 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 7 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf589 2.27126418908 0 77.5375003 1.8187495499999926 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf590 3.36364124845 0 78.01550005 1.101749925 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf591 3.76708619178 0 77.873500225 1.3147496624999988 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf592 3.52603923588 0 78.25900115 0.7364982749999953 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf593 1.63231514248 0 78.080002 1.0049970000000101 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf594 2.11514698074 0 78.396999575 0.5295006375000071 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf595 2.1510303661 0 78.34750035 0.6037494749999937 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf596 1.88123041257 0 78.4820001 0.6679999000000066 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf597 2.12995187868 0 78.2914997 0.6877504499999958 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf598 2.31935108241 0 78.26050055 0.7342491749999951 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf599 2.20258261036 0 78.264000675 0.7289989874999918 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf600 1.60552156231 0 78.020004 1.0949939999999998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 34 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 34 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf601 2.3901980036 0 78.20850115 0.8122482749999946 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf602 2.50154135982 0 78.21950075 0.7957488750000081 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf603 1.64456746106 0 78.020004 1.0949939999999998 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf604 2.29217450437 0 78.196500225 0.8302496625000089 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf605 1.92405135546 0 78.347999775 0.6030003374999922 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf606 2.87133870651 0 77.5960001 1.7309998500000034 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf607 2.64377420873 0 78.217499125 0.798751312499995 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf608 2.66590618726 0 77.57399955 1.7640006750000055 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf609 2.98396322778 0 77.766999775 1.4745003374999968 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf610 2.34818605648 0 77.59050045 1.73924932500001 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf611 3.72963656481 0 77.524999575 1.837500637500007 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf612 2.82425847867 0 77.514999775 1.8525003374999898 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf613 3.48520639466 0 78.08999955 0.9900006749999974 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf614 2.5234379499 0 77.820500025 1.3942499624999982 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf615 2.16275109817 0 77.4805004 1.9042494000000048 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf616 2.64786391927 0 77.5724995 1.7662507499999904 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf617 3.44574192026 0 78.094999875 0.9825001875000012 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf618 3.18710451669 0 77.59249975 1.7362503749999973 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf619 2.6708458791 0 78.0805004 1.004249399999992 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf620 2.68156783551 0 77.567500475 1.7737492874999958 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf621 2.23420690358 0 78.265499825 0.7267502624999906 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf622 3.59736474838 0 77.67899935 1.606500975000003 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf623 2.44242238448 0 77.613500425 1.704749362500003 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf624 3.16135199481 0 77.686498925 1.5952516125000074 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf625 2.6038089013 0 78.0219999 1.092000150000004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf626 2.07523892514 0 77.44449965 1.958250525000004 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf627 2.99538587737 0 77.8749992 1.3125011999999927 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf628 2.76271987895 0 77.775000225 1.4624996625000009 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf629 3.36322818328 0 77.66750085 1.6237487250000058 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf630 2.21882120614 0 78.028000175 1.0829997374999962 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf631 2.98884796399 0 77.430999175 1.978501237500005 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf632 2.98884796399 0 77.5090001 1.8614998500000084 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf633 2.24980746522 0 77.74700045 1.5044993249999976 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf634 2.35829610893 0 77.76049915 1.4842512749999983 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf635 2.82425847867 0 77.51750035 1.848749474999991 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf636 3.58371362677 0 77.9695004 1.1707493999999983 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf637 2.9094681628 0 77.549500075 1.8007498875000039 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf638 2.12995187868 0 78.2985002 0.6772496999999902 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf639 3.16500541712 0 77.782499525 1.4512507124999914 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf640 1.60552156231 0 77.68 1.6049999999999898 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 35 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 35 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf641 2.70433982991 0 77.605499025 1.7167514624999995 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf642 2.56840161772 0 77.600499875 1.7242501875000045 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf643 2.37362548074 0 77.542498575 1.8112521375000057 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 34 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 34 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf644 2.74609889077 0 77.627999375 1.6830009374999975 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf645 3.24180661389 0 78.065500075 1.0267498874999959 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf646 3.77591555364 0 77.65950015 1.6357497750000007 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf647 3.542000097 0 78.046499275 1.0552510874999967 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf648 3.59736474838 0 77.720499775 1.54425033750001 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf649 2.9094681628 0 77.516499875 1.8502501875000092 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf650 2.52903050801 0 77.679000875 1.6064986875000002 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf651 3.35910310464 0 77.7865001 1.4452498500000033 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf652 2.29428673152 0 77.814499675 1.40325048750001 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf653 3.4918718623 0 77.722500225 1.5412496624999932 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf654 3.49811600913 0 77.728000325 1.5329995125000053 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf655 3.72963656481 0 77.623000775 1.6904988375000087 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf656 2.16634367244 0 78.264499825 0.7282502624999978 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf657 3.53240628779 0 77.675500825 1.6117487624999995 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf658 3.18710451669 0 77.778000425 1.4579993624999972 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf659 2.83830787766 0 77.499500275 1.875749587499996 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf660 2.5234379499 0 77.6419991 1.66200134999999 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf661 3.66721064524 0 77.55650095 1.7902485749999997 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf662 3.4918718623 0 77.55699955 1.7895006749999993 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf663 4.71776016882 0 78.075000175 1.0124997375000007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf664 4.32443501431 0 78.085500375 0.9967494375000072 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf665 2.74472283119 0 77.926499925 1.2352501124999975 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf666 3.80371000503 0 78.209000525 0.81149921250001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf667 3.50662601431 0 77.9505009 1.1992486500000084 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf668 4.44155567719 0 78.136000275 0.920999587499999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf669 4.28586099646 0 78.069500375 1.020749437499994 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf670 2.76355621145 0 76.918499275 2.747251087499997 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf671 3.96334183535 0 77.812000025 1.4069999624999951 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf672 2.07823096988 0 77.849499275 1.3507510875000008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf673 2.2071979046 0 77.255500425 2.241749362500009 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf674 3.91630172491 0 78.20250025 0.8212496250000001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 7 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf675 2.37877965366 0 77.886000625 1.2959990625000088 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf676 3.48520639466 0 77.965499875 1.1767501874999908 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf677 3.39365096575 0 78.02050075 1.0942488750000052 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf678 2.7697048227 0 77.036 2.570999999999998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf679 2.14799364712 0 77.940999775 1.2135003375000082 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf680 3.1705014109 0 76.910999125 2.7585013124999946 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf681 3.06923723471 0 77.464000675 1.9289989875000089 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf682 2.35323022394 0 77.083500875 2.499748687500002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf683 2.5476157254 0 77.880000375 1.3049994375000082 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf684 2.23238575577 0 77.9034998 1.2697502999999912 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf685 4.89920842557 0 77.981501175 1.152748237499992 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf686 3.18821757541 0 77.998500525 1.1272492125000042 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf687 4.82158183139 0 78.083500475 0.9997492875000091 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf688 1.70550344452 0 77.68 1.6049999999999898 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf689 3.6515641406 0 77.9414998 1.2127502999999962 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf690 1.99715145162 0 78.178000275 0.8579995874999966 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf691 3.12527637116 0 77.0050001 2.6174998499999944 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf692 3.74288911287 0 78.258 0.7380000000000067 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 7 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf693 2.13793288512 0 78.1519997 0.8970004499999931 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf694 4.10841193347 0 78.2274998 0.7837502999999941 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf695 2.05715694099 0 77.563999325 1.7790010125000038 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf696 3.23567852554 0 77.442000425 1.961999362499995 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf697 2.38209008774 0 77.400999325 2.023501012499999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf698 2.3843713611 0 77.936499725 1.2202504124999933 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf699 4.21982434853 0 78.240000275 0.7649995875000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf700 4.00390160999 0 78.114000325 0.9539995124999905 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf701 2.44133377904 0 76.88649945 2.7952508249999966 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf702 3.52603923588 0 78.24050005 0.7642499250000085 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf703 2.0579298361 0 77.67449925 1.613251125000005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf704 3.05077488617 0 77.491999575 1.8870006375000088 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf705 3.23338646124 0 77.2919998 2.187000300000001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf706 3.96334183535 0 77.813999325 1.4040010125000038 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf707 2.8298030352 0 77.9330004 1.225499400000004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf708 4.60279195815 0 78.125999675 0.9360004874999959 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf709 3.22311222914 0 77.320499575 2.1442506375000008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf710 4.35330525145 0 78.2800002 0.7049996999999948 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf711 3.31800009086 0 77.253500775 2.2447488374999907 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf712 3.1270605866 0 78.171000475 0.8684992875000006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf713 2.75882367755 0 77.609999475 1.7100007875000074 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf714 4.69511411122 0 77.9954996 1.1317505999999966 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf715 3.53377364881 0 77.552999475 1.7955007875000106 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf716 2.28357392256 0 77.6785006 1.60724909999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf717 2.24630140357 0 77.262000425 2.231999362500005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf718 4.06752993595 0 78.1885008 0.8422488000000001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf719 2.35323022394 0 76.881001075 2.8034983874999995 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf720 2.28262228473 0 77.0555006 2.541749099999997 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf721 2.94386966745 0 78.093498825 0.9847517625000037 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf722 2.38769738778 0 77.418500475 1.9972492874999972 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf723 2.29948783073 0 77.775 1.4624999999999915 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf724 2.88586161931 0 78.1530001 0.8954998500000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf725 4.27315546194 0 78.2205006 0.7942491000000089 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf726 3.64524581982 0 78.02450025 1.088249624999996 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf727 3.87145936777 0 78.22300045 0.790499324999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf728 1.92027397741 0 77.3279997 2.13300044999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf729 3.41779112394 0 78.0579999 1.038000150000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf730 3.6515641406 0 77.90049995 1.2742500750000048 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf731 1.79680900793 0 78.42599915 0.48600127500000667 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf732 1.76031414454 0 78.606999125 0.5430008749999985 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf733 1.77170177496 0 78.338000125 0.6179998125000097 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf734 2.64377420873 0 78.252999875 0.7455001874999994 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf735 2.07194193579 0 78.442999125 0.4605013124999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf736 2.48881101405 0 78.138501425 0.9172478624999982 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf737 2.5635932838 0 78.22050075 0.794248875000001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf738 2.36440403604 0 78.284500375 0.6982494375000101 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf739 1.99061917958 0 78.33349955 0.6247506750000014 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf740 2.27428194775 0 78.27450025 0.713249624999996 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf741 1.95369833697 0 78.362500225 0.5812496624999923 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf742 1.99206710006 0 78.22650005 0.7852499250000022 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf743 2.55141313335 0 78.265999925 0.726000112499996 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf744 2.24152130544 0 78.300000325 0.6749995125000012 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf745 1.88123041257 0 78.385500125 0.5467498125000105 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf746 2.02237001209 0 78.501499625 0.6485003750000061 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf747 1.92405135546 0 78.301500625 0.6727490624999959 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf748 2.24152130544 0 78.350499675 0.599250487500008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf749 2.24152130544 0 78.238999175 0.7665012374999947 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf750 2.67240958712 0 78.2230003 0.7904995500000069 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf751 2.24152130544 0 78.163999475 0.8790007875000043 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf752 1.88771378107 0 78.321499625 0.6427505624999981 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf753 2.44133377904 0 78.307999575 0.663000637500005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf754 1.76439678846 0 78.19899945 0.8265008249999966 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf755 2.24152130544 0 78.233000325 0.7754995124999908 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf756 2.92188437688 0 77.6155006 1.7017490999999936 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf757 2.35829610893 0 77.784500175 1.4482497374999923 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf758 2.67110637001 0 77.565000675 1.7774989875000102 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf759 2.54477507927 0 77.81399975 1.4040003750000096 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf760 2.52903050801 0 77.79449925 1.4332511249999982 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf761 3.35910310464 0 77.543000525 1.8104992125000052 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf762 2.98396322778 0 77.81699965 1.3995005250000006 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf763 2.27522664257 0 77.9110006 1.2584991000000088 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf764 3.46799468161 0 77.600999725 1.7235004125000088 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf765 2.72858886384 0 77.59399985 1.7340002249999955 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf766 3.59736474838 0 77.69649975 1.5802503749999985 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf767 1.81876478645 0 77.32 2.1450000000000102 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 34 add fp32 1 tanh fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 34 add fp16 1 tanh fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf768 3.22083792723 0 77.53899915 1.8165012750000074 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf769 3.45051734728 0 77.7440003 1.5089995500000057 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf770 2.86683022571 0 77.706999775 1.5645003375000002 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf771 2.48136969546 0 77.583999875 1.7490001874999948 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf772 3.02516396272 0 77.7714992 1.4677512000000092 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf773 3.24411063565 0 78.043999525 1.0590007124999943 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf774 2.98396322778 0 77.6845 1.5982500000000002 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf775 2.54477507927 0 77.590999775 1.7385003374999997 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf776 2.46129952706 0 77.6789995 1.606500749999995 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf777 3.01849555693 0 77.45599925 1.9410011249999926 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf778 3.89181483916 0 77.65 1.6499999999999915 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf779 2.98884796399 0 77.515500675 1.8517489875000024 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf780 3.89181483916 0 77.39899925 2.026501124999996 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf781 3.49454519355 0 77.64049945 1.6642508250000105 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf782 3.27671427503 0 77.992000675 1.1369989875000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf783 2.98884796399 0 77.547499875 1.8037501875000004 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf784 2.03862830664 0 78.50150015 0.6484998500000018 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf785 2.06413396769 0 77.8630003 1.330499550000006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf786 3.24603315703 0 77.6249994 1.6875009000000105 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf787 2.87133870651 0 78.0169998 1.0995003000000096 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf788 2.07523892514 0 77.405499875 2.0167501874999942 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf789 2.95307508795 0 77.4635008 1.9297487999999916 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf790 2.72858886384 0 77.4759993 1.911001050000003 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf791 2.89409550228 0 77.545499775 1.8067503375000058 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf792 3.82389130321 0 77.5959998 1.731000299999998 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf793 3.39701846598 0 77.605000525 1.7174992125000088 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf794 3.25529306887 0 77.57349925 1.7647511250000036 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf795 3.63363848948 0 77.6295009 1.6807486500000053 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf796 3.4918718623 0 77.5595004 1.7857493999999932 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf797 3.11638574781 0 77.565498625 1.7767520624999946 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf798 3.19453962237 0 77.5310004 1.8284994000000054 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf799 2.83566297806 0 78.08499915 0.9975012749999976 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf800 3.52603923588 0 77.94200035 1.211999474999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf801 3.48520639466 0 77.989000675 1.1414989875000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf802 3.20613284165 0 78.133000875 0.92549868750001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf803 2.85338933385 0 77.49199965 1.8870005250000048 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf804 3.30478754695 0 77.596500125 1.7302498124999914 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf805 1.92906574394 0 77.572499625 1.766250562499991 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf806 1.8683963672 0 78.3395001 0.6157498500000074 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf807 3.58371362677 0 78.007500225 1.1137496624999983 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf808 2.6336049908 0 77.66750125 1.6237481249999988 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf809 3.77591555364 0 77.58999985 1.7400002250000028 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf810 3.07993730408 0 77.98450015 1.1482497749999965 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf811 1.93165087062 0 77.84100025 1.36349962500001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf812 2.64914454991 0 78.053000125 1.0454998125000046 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf813 3.01849555693 0 77.412500075 2.0062498875000045 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf814 1.9769681931 0 77.975500125 1.1617498125000054 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf815 3.42248933877 0 78.078999 1.006501500000006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf816 2.63411159335 0 77.61799985 1.6980002249999941 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf817 3.36364124845 0 78.058499125 1.037251312500004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf818 2.43612195203 0 78.1924998 0.8362503000000103 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf819 3.71194409149 0 77.449999775 1.9500003375000077 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf820 3.28812300212 0 77.5774994 1.7587509000000097 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf821 4.13446115525 0 77.995500225 1.131749662499999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf822 4.43580061952 0 78.0010006 1.1234991000000036 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf823 4.49698282055 0 78.079500425 1.0057493624999907 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf824 3.27045362948 0 77.385499225 2.0467511624999943 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf825 2.41549528692 0 77.288999575 2.191500637499992 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf826 3.91238549312 0 77.974000825 1.1639987624999932 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf827 3.23338646124 0 77.27799945 2.2080008250000063 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf828 2.92157268012 0 77.929500675 1.2307489875000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf829 3.04365492811 0 77.0490005 2.551499249999992 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf830 3.6350853616 0 77.55699955 1.7895006749999993 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf831 3.14321067355 0 77.30549915 2.1667512749999958 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf832 2.81352330076 0 78.131999375 0.9270009374999901 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf833 2.51025423632 0 77.7514998 1.4977502999999928 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf834 1.98484848485 0 76.900002 2.774996999999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf835 5.01218173084 0 78.1185006 0.9472490999999934 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf836 2.81757564846 0 77.3214996 2.1427506000000065 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf837 3.31800009086 0 77.314999975 2.1525000374999905 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf838 3.50662601431 0 78.001000625 1.1234990624999952 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf839 2.37444864695 0 77.68899985 1.5915002249999972 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf840 4.21982434853 0 78.1680008 0.8729987999999977 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf841 2.53629102275 0 77.1980002 2.3279997000000066 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf842 1.67627481734 0 77.219994 2.2950090000000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf843 4.60279195815 0 78.109000275 0.9614995875000005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf844 3.53377364881 0 77.57350025 1.7647496250000074 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf845 3.91798251104 0 78.2325003 0.7762495500000028 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf846 4.36510071125 0 78.20000035 0.8249994750000056 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf847 3.1270605866 0 78.18399965 0.8490005249999939 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf848 2.58610616022 0 78.1775008 0.8587487999999937 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf849 2.07823096988 0 77.853500125 1.3447498125000052 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf850 3.96334183535 0 77.8455002 1.3567496999999946 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf851 4.44155567719 0 78.2215 0.792749999999991 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf852 2.22422719659 0 77.0174992 2.5987511999999953 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf853 3.1705014109 0 76.985000175 2.647499737500006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf854 3.69591437391 0 77.835499375 1.3717509375000034 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf855 3.1705014109 0 76.8904994 2.789250900000006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf856 5.09612029865 0 77.963500725 1.1797489124999956 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf857 4.87565925629 0 78.03849965 1.0672505249999915 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf858 3.6515641406 0 77.90149955 1.2727506750000046 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf859 4.89920842557 0 78.0020002 1.1219997000000035 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf860 1.91009546227 0 76.919998 2.74500299999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf861 4.66234332773 0 77.9634993 1.1797510500000072 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf862 3.58371362677 0 78.027999725 1.0830004124999988 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf863 2.98396322778 0 77.876999875 1.3095001875000065 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf864 3.92134841516 0 78.1380001 0.9179998500000011 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf865 2.18431284582 0 78.071999925 1.0170001124999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf866 4.69511411122 0 78.008 1.1130000000000067 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf867 2.76355621145 0 76.909499575 2.760750637500003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf868 3.27045362948 0 77.92649975 1.2352503749999926 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf869 4.98028095379 0 78.05599975 1.041000375000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf870 3.96334183535 0 77.7624998 1.4812502999999992 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf871 3.80371000503 0 78.204000075 0.8189998875000057 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf872 2.76355621145 0 77.818000175 1.3979997375000082 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf873 2.18431284582 0 77.952000375 1.1969994375000041 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf874 3.6583930271 0 77.95250055 1.1962491750000055 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf875 2.35323022394 0 76.93950005 2.71574992499999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf876 3.35622156523 0 77.8575001 1.3387498500000063 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf877 2.31895830908 0 77.948000125 1.2029998125000105 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf878 4.00390160999 0 78.0455004 1.0567494000000082 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf879 3.05997821259 0 77.5605001 1.784249850000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf880 4.77617552809 0 78.16100065 0.8834990249999919 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf881 3.18821757541 0 77.956999575 1.1895006375000037 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf882 2.33517186251 0 77.0050003 2.617499549999991 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf883 2.45556178201 0 77.736999325 1.519501012500001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf884 3.44357563851 0 77.866499875 1.3252501874999965 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf885 3.40081490105 0 77.980000275 1.1549995875000079 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf886 4.28586099646 0 78.068000025 1.0229999624999948 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf887 2.6666849235 0 77.8174999 1.398750149999998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf888 2.40933283482 0 78.249000425 0.7514993624999917 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf889 2.12995187868 0 78.315999275 0.6510010875000063 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf890 1.99061917958 0 78.5549996 0.5950003999999979 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf891 2.49016877877 0 78.22750045 0.7837493250000094 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf892 2.36440403604 0 78.225500475 0.786749287499994 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf893 2.0051937949 0 78.266501025 0.7252484625000051 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf894 2.23420690358 0 78.3095012 0.6607482000000005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf895 1.99206710006 0 78.292000425 0.6869993625000035 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf896 1.88771378107 0 78.3400007 0.6149989499999933 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf897 2.43612195203 0 78.225499325 0.7867510124999981 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf898 1.540499209 0 78.159996 0.88500599999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf899 2.10636242153 0 78.417000075 0.49949988749999363 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf900 2.16360539179 0 78.338500575 0.6172491375000035 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf901 1.94813942115 0 78.329001025 0.6314984625000051 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf902 2.16634367244 0 78.270499625 0.719250562500001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf903 3.70240971999 0 77.72750015 1.533749775000004 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf904 2.46129952706 0 77.588999775 1.7415003374999927 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf905 2.67110637001 0 77.60749935 1.7137509750000035 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf906 3.49454519355 0 77.474000025 1.9139999625000073 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf907 3.16720152652 0 77.601500275 1.7227495874999903 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf908 2.57467948341 0 77.621499675 1.6927504875000068 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf909 2.25834132162 0 77.974500125 1.1632498124999913 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf910 3.63363848948 0 77.506999425 1.8645008624999946 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf911 3.84266973008 0 77.47100045 1.918499324999992 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf912 3.89181483916 0 77.640499875 1.664250187499995 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf913 2.1510303661 0 78.366999625 0.5745005624999919 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf914 3.59736474838 0 77.5859996 1.7460006000000092 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf915 1.85888516523 0 77.279999 2.2050014999999945 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf916 3.25529306887 0 77.648500275 1.6522495874999947 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf917 2.70433982991 0 77.718998725 1.5465019124999912 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf918 2.80373024506 0 78.0499997 1.0500004499999989 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf919 3.45051734728 0 77.5799984 1.7550024000000093 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf920 2.98884796399 0 77.48249965 1.901250525000009 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf921 1.92906574394 0 77.702499575 1.5712506374999933 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf922 3.18710451669 0 77.611000225 1.708499662500003 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf923 3.1306351361 0 78.080499325 1.0042510124999922 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf924 2.20630884648 0 77.962500925 1.1812486124999992 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf925 3.84266973008 0 77.69300005 1.5854999250000077 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf926 2.24980746522 0 77.5304995 1.8292507499999928 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf927 2.84007388684 0 77.789499625 1.4407505624999928 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf928 1.85888516523 0 77.300003 2.1749954999999943 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf929 3.58371362677 0 77.96999965 1.1700005249999919 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf930 3.4918718623 0 77.5699999 1.7700001500000013 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf931 1.96561700982 0 77.9269995 1.2345007500000094 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf932 3.11638574781 0 77.448000375 1.9529994374999902 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf933 2.27522664257 0 77.884999125 1.2975013125000103 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf934 2.48136969546 0 77.53750075 1.81874887499999 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf935 1.81635233161 0 78.004500925 1.1182486124999969 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf936 2.45556178201 0 77.64299995 1.6605000749999945 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf937 3.59076159103 0 77.530500475 1.829249287500005 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf938 3.02516396272 0 77.670999775 1.6185003375000022 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf939 3.542000097 0 78.062499825 1.031250262499995 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf940 1.89424199196 0 78.40299995 0.5205000750000082 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf941 1.82360897249 0 77.93999975 1.2150003750000025 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf942 2.34818605648 0 77.39100015 2.0384997750000053 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf943 3.45487014523 0 77.540999925 1.8135001125000088 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf944 2.11694541871 0 78.096500175 0.9802497374999959 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf945 2.23238575577 0 77.707000075 1.5644998875000056 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf946 2.20915635682 0 78.0034997 1.1197504499999908 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf947 3.13457659127 0 77.79799975 1.4280003749999963 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf948 3.26928242088 0 77.75549965 1.4917505249999934 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf949 3.48564996739 0 78.11399995 0.9540000750000104 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf950 4.98028095379 0 78.00299975 1.1205003749999989 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf951 2.44133377904 0 76.8854997 2.7967504500000047 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf952 4.59661176964 0 78.033999425 1.074000862499993 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf953 2.38209008774 0 77.401500575 2.0227491375 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf954 2.94386966745 0 78.122500675 0.9412489875000034 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf955 2.8077544116 0 77.952499975 1.1962500375000076 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf956 4.89920842557 0 78.042499675 1.061250487499997 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf957 2.53629102275 0 76.9699994 2.670000899999991 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf958 4.79205397957 0 77.998000425 1.1279993624999989 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf959 2.41762758621 0 78.1275004 0.9337493999999964 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf960 3.13565325662 0 78.0470011 1.0544983499999958 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf961 3.27045362948 0 77.32399985 2.1390002249999895 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf962 3.4869813633 0 78.087499975 0.9937500374999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 6 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf963 1.88381840849 0 78.0530005 1.045499250000006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf964 4.48813937768 0 78.20750085 0.8137487249999964 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf965 3.27045362948 0 77.946500575 1.2052491374999974 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf966 3.06923723471 0 77.462500025 1.9312499625000044 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf967 2.88343094032 0 77.0365002 2.570249699999991 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf968 3.05009536007 0 78.048499675 1.0522504874999967 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 6 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf969 2.55260214603 0 77.1134998 2.4547503000000006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 6 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf970 2.84775205688 0 77.982000625 1.1519990625000034 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf971 2.68682935802 0 78.218001525 0.797997712499992 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf972 3.1705014109 0 76.964999425 2.677500862499997 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf973 2.73868451644 0 76.981499525 2.6527507124999943 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf974 4.60279195815 0 78.200501275 0.8242480875000098 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf975 2.6666849235 0 77.78500065 1.447499024999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf976 3.76294547016 0 77.81899995 1.3965000749999916 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf977 4.69591915468 0 78.12349835 0.9397524749999917 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf978 4.90008498647 0 78.1084997 0.9622504500000062 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf979 4.00390160999 0 77.994000575 1.1339991374999983 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf980 3.58371362677 0 78.021500225 1.0927496625000046 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf981 2.97909443198 0 78.1365005 0.9202492500000048 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf982 2.51025423632 0 77.759499975 1.4857500375000043 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf983 2.88343094032 0 77.00999965 2.610000525000004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf984 3.78635617385 0 78.281000475 0.7034992875000015 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf985 3.91630172491 0 78.2364997 0.770250450000006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 7 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf986 2.19147904018 0 77.157999825 2.388000262499993 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf987 4.50808625337 0 78.13050045 0.9292493250000007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf988 2.02237001209 0 78.5584997 0.5915003000000013 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf989 3.41779112394 0 77.9699995 1.1700007499999998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf990 4.13446115525 0 77.980500875 1.1542486874999938 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf991 4.87565925629 0 78.00549905 1.116751425000004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf992 2.59985404578 0 77.857500025 1.3387499625000103 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf993 3.89292123452 0 77.8730001 1.315499850000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf994 2.09813263107 0 77.88900045 1.2914993250000038 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf995 3.58559037722 0 77.913500175 1.2547497375000063 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf996 2.0941218638 0 77.07050035 2.519249474999995 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf997 3.6515641406 0 77.900000975 1.2749985375000037 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf998 2.55165084726 0 78.32750035 0.633749475000009 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf999 2.3341767432 0 78.415499575 0.5017506375000025 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1000 3.52603923588 0 77.950500225 1.1992496625000015 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1001 3.89292123452 0 77.8040005 1.4189992499999988 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1002 4.21982434853 0 78.181999975 0.8520000375000052 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1003 2.27220637801 0 77.9985003 1.1272495499999948 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1004 4.12325686433 0 78.208999675 0.8115004874999983 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1005 3.24718776399 0 78.17100015 0.8684997750000036 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1006 4.36510071125 0 78.194000825 0.8339987624999949 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1007 2.23202188242 0 77.88950165 1.2907475250000005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1008 3.6350853616 0 77.5305006 1.8292491000000055 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1009 3.1705014109 0 76.911999825 2.757000262500007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1010 2.63995156218 0 78.09099915 0.9885012749999973 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1011 2.26900612048 0 77.43999935 1.9650009750000095 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1012 2.95817862688 0 78.152999375 0.8955009375000103 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1013 3.76294547016 0 77.7829996 1.4505006000000051 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1014 3.1306351361 0 78.0964996 0.980250599999998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1015 3.17822794726 0 78.00099945 1.1235008250000078 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1016 2.32921384149 0 78.343000275 0.6104995875000085 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1017 4.27983318032 0 78.19599975 0.8310003750000021 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1018 3.20200949945 0 78.057999375 1.0380009375000085 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1019 5.09612029865 0 77.9790006 1.1564990999999907 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1020 3.21441146697 0 78.006501025 1.1152484624999914 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1021 2.83419571826 0 77.893000175 1.285499737500004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1022 2.92157268012 0 77.982500775 1.1512488374999919 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1023 3.70341103092 0 77.94600085 1.2059987249999935 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1024 4.43580061952 0 78.02299955 1.0905006750000084 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1025 3.21441146697 0 77.967000425 1.1744993625000077 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1026 3.43063493306 0 78.3300017 0.6299974500000047 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1027 3.05077488617 0 78.101000325 0.9734995124999983 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1028 2.6974145891 0 77.746499625 1.5052505625000023 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1029 3.75675142744 0 78.24849945 0.7522508250000044 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1030 3.13565325662 0 77.45699975 1.9395003750000086 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1031 4.76952130091 0 77.99300005 1.1354999249999906 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1032 1.63231514248 0 78.219994 0.7950090000000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1033 2.16360539179 0 78.309999475 0.6600007875000031 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1034 2.14664674472 0 78.349999775 0.6000003374999991 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1035 2.03862830664 0 78.498499375 0.6515006250000056 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1036 1.92297058901 0 78.362000525 0.5819992125000013 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1037 2.14833063686 0 78.43400025 0.4739996250000047 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1038 1.88123041257 0 78.516999425 0.6330005750000055 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1039 1.94813942115 0 78.2904999 0.6892501499999995 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1040 2.48926343774 0 78.2200001 0.7949998500000106 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1041 2.18361621336 0 78.249499325 0.7507510124999968 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1042 2.37280288508 0 78.307500125 0.6637498124999937 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1043 1.89424199196 0 78.383499925 0.549750112500007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1044 2.59394423726 0 78.23650015 0.7702497750000035 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1045 2.29217450437 0 78.15900005 0.8864999249999954 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1046 1.88123041257 0 78.355000775 0.5924988375000098 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1047 2.23420690358 0 78.266000375 0.7259994374999934 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1048 2.20915635682 0 78.045500025 1.0567499625000067 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1049 2.57467948341 0 77.5895 1.7407499999999985 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1050 3.72963656481 0 77.702 1.5720000000000027 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1051 2.87133870651 0 77.7084998 1.5622503000000023 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1052 3.36322818328 0 77.545000475 1.8074992875000078 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1053 2.27522664257 0 77.99050005 1.1392499250000085 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1054 3.45051734728 0 77.57399995 1.7640000749999984 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1055 2.00445999726 0 77.695999425 1.5810008625000052 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1056 3.16135199481 0 77.4580006 1.9379990999999919 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1057 2.72858886384 0 77.643 1.660499999999999 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1058 2.43050085973 0 78.240499875 0.7642501875000036 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1059 2.97779876864 0 77.5065006 1.8652491000000069 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1060 2.44242238448 0 77.486000225 1.895999662500003 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1061 2.95498685009 0 77.769999475 1.4700007874999912 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1062 2.99538587737 0 77.6839996 1.5990005999999894 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1063 2.70007394171 0 78.2305 0.7792499999999905 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1064 3.11638574781 0 77.429000275 1.9814995874999894 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1065 2.73376380311 0 77.986499175 1.1452512374999912 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1066 2.24980746522 0 77.919499525 1.2457507124999907 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1067 2.54477507927 0 77.650499175 1.6492512375000103 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1068 3.59736474838 0 77.56399995 1.7790000750000061 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1069 1.68919856598 0 77.599998 1.725003000000001 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1070 2.27220637801 0 77.841999425 1.362000862500004 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1071 1.96561700982 0 77.838000275 1.3679995875000017 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1072 2.65840178604 0 77.9914997 1.1377504499999915 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1073 2.67110637001 0 77.647498625 1.6537520625000042 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1074 3.77591555364 0 77.45899885 1.9365017249999994 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1075 2.12995187868 0 78.3009999 0.6735001500000095 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1076 3.66721064524 0 77.549000725 1.8014989125000014 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1077 3.11638574781 0 77.498499925 1.8772501124999934 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1078 3.16135199481 0 77.5295002 1.8307496999999984 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1079 2.5234379499 0 77.82349965 1.3897505249999966 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1080 2.71748837209 0 77.85500125 1.3424981249999988 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1081 3.4918718623 0 77.52800005 1.8329999249999958 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1082 3.84266973008 0 77.68799935 1.5930009750000025 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1083 2.89409550228 0 77.606999775 1.7145003374999916 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1084 2.60281905984 0 77.470499775 1.91925033750001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 34 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 34 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1085 2.76271987895 0 77.39050045 2.039249324999993 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1086 2.63512538337 0 77.775500225 1.4617496624999973 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1087 3.44314270897 0 77.59700025 1.7294996250000096 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1088 3.59076159103 0 77.57299945 1.7655008250000037 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1089 3.59736474838 0 77.616498975 1.7002515375000016 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1090 3.84266973008 0 77.458501075 1.9372483874999986 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1091 2.48136969546 0 77.501500725 1.8727489125000005 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1092 3.44574192026 0 78.093999125 0.9840013125000056 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1093 3.39701846598 0 77.636000025 1.670999962499998 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1094 2.84007388684 0 77.776000025 1.4609999624999972 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1095 2.41443054714 0 77.6015001 1.7227498500000067 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1096 3.18710451669 0 77.5355001 1.8217498500000104 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1097 2.21882120614 0 78.0529997 1.0455004499999987 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1098 1.62843286633 0 78.160004 0.884993999999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1099 2.21882120614 0 78.022499125 1.091251312500006 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1100 3.24180661389 0 78.083999625 0.9990005624999938 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1101 2.72018747284 0 77.89399965 1.2840005250000033 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1102 2.99538587737 0 77.865000125 1.3274998125000081 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1103 3.72963656481 0 77.505000175 1.8674997374999904 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1104 2.64786391927 0 77.71150035 1.5577494750000085 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1105 2.57467948341 0 77.849499925 1.3507501124999948 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1106 3.87145936777 0 78.20300085 0.8204987250000073 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1107 2.65840178604 0 77.654498675 1.6432519875000011 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1108 3.70341103092 0 77.9179999 1.2480001500000029 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1109 2.58903988183 0 78.103500525 0.9697492124999982 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1110 2.95307508795 0 77.5050006 1.8674990999999963 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1111 2.59985404578 0 77.8775005 1.3087492500000053 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1112 3.11142905832 0 78.110500675 0.9592489875000041 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1113 2.6262810908 0 77.9474991 1.2037513499999974 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1114 3.57575930671 0 77.5924996 1.7362506000000053 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1115 3.53696827831 0 77.8979997 1.2780004500000004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1116 2.76355621145 0 77.766500125 1.4752498125000102 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1117 3.32808014218 0 77.96799985 1.1730002250000027 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1118 1.8517211653 0 77.825500325 1.3867495125000104 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1119 3.05997821259 0 77.4799999 1.9050001500000064 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1120 2.69608687627 0 77.7369999 1.519500149999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1121 2.73868451644 0 76.991999625 2.637000562499992 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1122 3.15516049596 0 78.0829996 1.0005006000000094 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1123 4.66234332773 0 78.014000425 1.1039993624999909 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1124 4.49698282055 0 78.037999725 1.0680004124999911 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1125 3.26850208106 0 77.27399955 2.214000675000001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1126 2.63995156218 0 78.168500525 0.8722492125000016 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1127 2.76216260511 0 77.72600055 1.5359991750000077 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1128 4.2459932947 0 78.1615003 0.8827495499999998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1129 4.71776016882 0 78.141999875 0.9120001875000057 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1130 2.96779546224 0 77.037499625 2.568750562500007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1131 2.13793288512 0 78.1220006 0.9419990999999897 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1132 2.33816231791 0 77.426000075 1.9859998874999931 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1133 4.13446115525 0 78.046999775 1.054500337499995 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1134 2.37877965366 0 77.9255001 1.2367498500000096 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1135 4.90008498647 0 78.032500275 1.0762495874999942 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1136 3.542000097 0 78.06050045 1.0342493249999904 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1137 3.29564085979 0 78.013500225 1.104749662499998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1138 2.75993575613 0 78.140999775 0.9135003375000039 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1139 2.07194193579 0 78.491999825 0.6580001750000065 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1140 4.67427943592 0 78.136500725 0.9202489124999929 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1141 4.39310849558 0 78.14650025 0.9052496249999962 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1142 2.6666849235 0 77.769999725 1.4700004124999921 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1143 3.12848942641 0 77.2724995 2.2162507500000075 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1144 3.6515641406 0 77.9045 1.2682500000000019 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1145 1.91946646809 0 78.0205012 1.0942482000000027 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1146 2.47800196795 0 77.707 1.5645000000000095 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1147 3.13565325662 0 78.0715006 1.0177491000000103 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1148 2.27220637801 0 77.976999875 1.1595001874999937 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1149 4.50808625337 0 78.2275001 0.7837498499999995 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1150 3.3268673034 0 78.090999825 0.9885002625000041 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1151 3.27045362948 0 77.920499375 1.2442509374999915 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1152 3.41779112394 0 77.97850025 1.1572496250000057 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1153 3.4088575296 0 77.643500325 1.6597495124999924 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1154 3.84374835529 0 77.912500475 1.2562492874999975 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1155 2.44133377904 0 77.68549885 1.5967517249999972 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1156 4.50808625337 0 78.19299935 0.8355009750000093 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1157 3.91238549312 0 77.884499575 1.29825063749999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1158 4.27983318032 0 78.21400065 0.803999024999996 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1159 4.48813937768 0 78.13750035 0.9187494750000056 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1160 3.23567852554 0 77.40799915 2.0130012750000077 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1161 3.19976487082 0 78.14350035 0.9097494750000052 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1162 4.59198750865 0 78.08450075 0.9982488749999945 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1163 3.05997821259 0 78.078499975 1.0072500375000004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1164 2.91566305143 0 77.81099945 1.4085008250000044 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1165 4.37206912378 0 78.126499725 0.9352504124999967 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1166 2.55141313335 0 78.257000525 0.7394992125000073 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1167 2.6038089013 0 77.8709997 1.318500450000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1168 3.20989056068 0 78.087999475 0.9930007874999944 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1169 4.69591915468 0 78.073000175 1.0154997374999937 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1170 4.07904045576 0 78.0680005 1.0229992500000051 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1171 3.70341103092 0 77.97799995 1.158000075000004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1172 4.04291144613 0 78.1914999 0.8377501500000051 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1173 2.73868451644 0 77.900999825 1.2735002625000007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1174 2.81352330076 0 78.18849925 0.84225112499999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1175 2.72831704128 0 77.1470015 2.4044977499999973 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1176 2.02536121357 0 77.484 1.899000000000008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1177 2.89378970586 0 78.066000325 1.0259995124999932 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1178 4.47713386599 0 78.0374989 1.0687516499999958 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1179 2.28186154535 0 78.30250075 0.6712488750000105 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1180 1.59477017142 0 78.099998 0.975003000000001 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1181 1.99206710006 0 78.364499875 0.5782501874999895 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1182 1.79680900793 0 78.4280002 0.4829997000000006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1183 2.10636242153 0 78.437001375 0.4694979375000088 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1184 2.3343756992 0 78.258500675 0.7372489875000099 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1185 1.99206710006 0 78.21500055 0.8024991750000012 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1186 2.48926343774 0 78.25100025 0.7484996249999938 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1187 2.1510303661 0 78.399999275 0.5250010875000015 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1188 2.29217450437 0 78.2684995 0.7222507499999935 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1189 2.14833063686 0 78.370000125 0.5699998124999937 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1190 2.07194193579 0 78.4640009 0.685999099999998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1191 3.59076159103 0 77.510000225 1.8599996625000017 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1192 3.72963656481 0 77.538999975 1.8165000375000062 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1193 3.06545828495 0 78.12249985 0.9412502250000045 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1194 2.25834132162 0 77.95550025 1.191749625 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1195 3.24180661389 0 78.09600075 0.9809988749999974 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1196 3.39238987381 0 78.1539999 0.8940001499999966 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1197 3.70240971999 0 77.582999925 1.7505001125000064 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1198 3.4918718623 0 77.696999525 1.579500712500007 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1199 3.84266973008 0 77.717499325 1.5487510124999915 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1200 2.63411159335 0 77.5909994 1.7385008999999982 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1201 3.89181483916 0 77.60349975 1.7197503750000038 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1202 2.17046411412 0 78.16049905 0.8842514250000022 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1203 2.1510303661 0 78.32750015 0.6337497749999912 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1204 2.87133870651 0 77.3539998 2.0940003000000047 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1205 1.87210017687 0 78.310500475 0.6592492875000033 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1206 2.99538587737 0 77.777999625 1.4580005624999899 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1207 2.91566305143 0 77.823999825 1.389000262499998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1208 2.48136969546 0 77.5454995 1.806750749999992 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1209 2.0051937949 0 78.177000725 0.8594989125000012 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1210 2.49470538002 0 77.80050115 1.4242482749999965 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1211 3.28812300212 0 77.6694994 1.6207508999999902 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1212 2.64786391927 0 77.750500475 1.4992492875000067 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1213 2.80373024506 0 77.881000475 1.30349928750001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1214 2.07194193579 0 78.442500475 0.46124928749999583 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1215 3.59736474838 0 77.547500075 1.8037498874999969 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1216 3.16500541712 0 77.667500625 1.6237490624999964 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1217 2.24980746522 0 77.682499425 1.6012508624999953 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1218 1.74073411989 0 77.279999 2.2050014999999945 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1219 3.4918718623 0 77.587500025 1.7437499625000044 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1220 3.11142905832 0 78.137999725 0.9180004124999996 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1221 2.03272718838 0 77.937999625 1.218000562499995 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1222 2.95498685009 0 77.814499925 1.4032501124999897 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1223 3.89181483916 0 77.42200035 1.991999474999993 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1224 2.21882120614 0 78.0514997 1.0477504500000094 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1225 2.11107096401 0 77.77950025 1.4557496250000028 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1226 3.30478754695 0 77.557500175 1.788749737499998 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1227 3.16135199481 0 77.4919998 1.8870002999999969 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1228 3.542000097 0 78.091499675 0.9877504875000085 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1229 3.66721064524 0 77.467500125 1.9237498124999988 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1230 2.04029892443 0 77.632001075 1.6769983874999923 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1231 2.76271987895 0 77.370999175 2.0685012375000085 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1232 3.18710451669 0 77.56999975 1.7700003750000093 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1233 3.72963656481 0 77.694500375 1.583249437499994 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1234 2.98884796399 0 77.464500175 1.9282497375000034 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1235 2.30393703271 0 77.696999775 1.5795003375000078 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1236 2.57467948341 0 77.613499225 1.7047511625000027 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1237 2.84007388684 0 77.7164995 1.5502507500000036 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1238 2.1432868672 0 77.980999425 1.1535008625000103 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1239 3.84266973008 0 77.508499775 1.8622503374999937 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1240 2.24980746522 0 77.574000025 1.7639999624999945 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1241 2.41443054714 0 77.596499225 1.7302511624999966 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1242 2.79400512644 0 77.93699985 1.2195002249999902 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1243 3.27045362948 0 77.3099993 2.160001049999998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1244 2.45270292962 0 78.304000525 0.6689992124999904 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1245 2.76355621145 0 76.920999525 2.7435007125000013 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1246 2.91070503368 0 77.567500075 1.7737498875000028 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1247 4.60279195815 0 78.169499625 0.8707505624999996 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1248 5.01218173084 0 78.1449999 0.9075001499999971 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1249 4.79205397957 0 78.0169998 1.0995003000000096 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1250 3.58371362677 0 78.0214997 1.0927504499999898 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1251 4.27315546194 0 78.194499825 0.8332502625000089 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1252 3.96334183535 0 77.71600015 1.550999775000001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1253 2.52809669417 0 77.74900045 1.5014993250000046 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 7 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1254 3.23338646124 0 77.338500225 2.1172496624999937 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1255 3.1705014109 0 76.979499875 2.655750187499997 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1256 4.60279195815 0 78.067500075 1.0237498875000028 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1257 3.31800009086 0 77.27149965 2.2177505250000067 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1258 4.69511411122 0 78.07099935 1.0185009750000091 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1259 4.13446115525 0 78.039001075 1.0664983874999976 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1260 2.83419571826 0 77.84549965 1.3567505250000096 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1261 2.97909443198 0 78.13100035 0.9284994750000095 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1262 5.09612029865 0 78.0065002 1.1152496999999926 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1263 2.51025423632 0 77.747500525 1.50374921249999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1264 4.44155567719 0 78.142999375 0.9105009374999966 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1265 4.66234332773 0 78.030499625 1.0792505624999933 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1266 4.21982434853 0 78.242999675 0.7605004875000105 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1267 2.23202188242 0 77.88099955 1.3035006750000022 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1268 4.89920842557 0 77.916500125 1.2502498125000017 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1269 3.93148091658 0 78.236500275 0.7702495875000039 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1270 4.06752993595 0 78.23250025 0.7762496249999984 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1271 4.28586099646 0 78.0454995 1.056750749999992 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1272 3.55210680761 0 77.8340006 1.373999100000006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1273 3.53377364881 0 77.581000125 1.7534998124999959 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1274 2.55165084726 0 78.29200035 0.6869994750000075 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1275 2.63995156218 0 78.137500525 0.9187492125000105 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1276 3.18821757541 0 77.458999075 1.9365013875000088 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1277 3.6515641406 0 77.89600045 1.2809993249999962 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1278 3.99339287342 0 78.204501125 0.8182483125000104 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1279 2.31895830908 0 77.9475006 1.2037491000000031 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1280 2.61175481887 0 78.01199955 1.1070006750000019 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1281 4.36510071125 0 78.160500325 0.8842495124999985 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1282 2.18500992291 0 77.8480004 1.3529993999999945 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1283 4.12325686433 0 78.20700015 0.8144997750000016 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1284 3.6350853616 0 77.549999975 1.8000000374999914 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1285 3.40081490105 0 78.0040002 1.1189997000000105 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1286 3.9639154741 0 78.268001 0.7229985000000028 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1287 2.66564670957 0 78.252999475 0.7455007875000064 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1288 4.00390160999 0 78.072999525 1.0155007124999997 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1289 3.01949394303 0 77.361000075 2.0834998874999897 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1290 2.58610616022 0 78.197999625 0.8280005625000086 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1291 3.20613284165 0 78.168999925 0.8715001125000086 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1292 4.77617552809 0 78.1380003 0.9179995499999976 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1293 3.40081490105 0 78.0779995 1.0080007499999937 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1294 3.33253479352 0 78.0814993 1.0027510499999934 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1295 3.24718776399 0 78.15799955 0.8880006750000007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1296 4.82158183139 0 78.0450005 1.0574992499999993 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1297 4.42219194672 0 78.151999825 0.8970002624999935 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1298 3.70341103092 0 77.894499775 1.2832503375000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1299 4.98028095379 0 77.936000225 1.2209996624999988 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1300 2.8077544116 0 77.9090004 1.2614994000000053 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1301 3.96334183535 0 77.7869993 1.4445010499999924 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1302 3.18821757541 0 77.96200045 1.1819993249999925 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1303 2.32921384149 0 78.3434994 0.6097509000000016 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1304 1.8683963672 0 78.375500975 0.5617485375000086 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1305 2.77391276825 0 78.1604992 0.8842511999999942 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1306 1.95369833697 0 78.2910009 0.6884986499999997 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1307 2.44133377904 0 78.239500475 0.7657492875000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1308 2.29217450437 0 78.24800045 0.7529993249999904 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1309 2.09572432924 0 78.159998775 0.885001837499992 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1310 2.11514698074 0 78.38300025 0.5504996250000076 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1311 2.29217450437 0 78.30849875 0.6622518750000026 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1312 2.67240958712 0 78.24099945 0.7635008249999942 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1313 2.3901980036 0 78.1964996 0.8302506000000065 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1314 2.80948259281 0 78.196999525 0.8295007125000069 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1315 2.97779876864 0 77.499500075 1.8757498874999996 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1316 3.59736474838 0 77.560998975 1.783501537499994 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1317 3.70240971999 0 77.5669998 1.7745002999999926 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1318 2.98884796399 0 77.60950015 1.7107497749999965 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1319 2.97779876864 0 77.5099994 1.8600009000000028 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1320 3.70240971999 0 77.66250015 1.6312497750000006 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1321 1.72755765819 0 78.1795002 0.8557496999999898 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1322 2.17132451316 0 78.100500225 0.974249662499993 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1323 3.37566443263 0 77.740001125 1.5149983124999906 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1324 2.57346981923 0 78.0845013 0.9982480500000008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1325 2.4337406276 0 77.568500025 1.7722499624999912 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1326 2.7795432921 0 77.8360002 1.3709996999999987 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1327 3.77591555364 0 77.44349995 1.9597500749999952 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1328 3.48520639466 0 78.0400009 1.0649986500000068 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1329 1.96561700982 0 77.7650008 1.4774988000000064 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1330 3.53240628779 0 77.519998525 1.8450022124999919 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1331 3.1738081787 0 77.63000035 1.6799994749999954 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1332 3.49454519355 0 77.520499075 1.8442513874999946 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1333 2.86683022571 0 77.6179995 1.6980007500000056 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1334 2.53160205673 0 77.963498775 1.1797518374999925 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1335 2.41443054714 0 77.595499575 1.7317506374999923 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1336 2.98884796399 0 77.4784996 1.9072505999999905 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1337 2.98396322778 0 77.7334995 1.5247507500000097 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1338 3.77591555364 0 77.599000525 1.7264992125000092 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1339 1.91009546227 0 77.440002 1.9649969999999897 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1340 2.27522664257 0 77.9180006 1.2479991000000012 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1341 3.22083792723 0 77.550500025 1.7992499624999923 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1342 3.45487014523 0 77.52699985 1.8345002250000064 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1343 2.70433982991 0 77.747000325 1.5044995124999971 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1344 3.49811600913 0 77.567999575 1.7730006374999974 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1345 3.58371362677 0 78.0034993 1.1197510499999979 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1346 2.48136969546 0 77.50999955 1.8600006749999949 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1347 2.2351185922 0 77.927000625 1.2344990624999923 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1348 3.4918718623 0 77.55899955 1.7865006750000063 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1349 2.85338933385 0 77.505499775 1.8667503374999939 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1350 3.02516396272 0 77.8040009 1.4189986499999918 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1351 3.02516396272 0 77.6645 1.6282499999999942 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1352 1.7496306648 0 78.080002 1.0049970000000101 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1353 3.28260543861 0 78.17100045 0.868499325000009 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1354 3.45051734728 0 77.615999825 1.7010002624999956 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1355 2.98396322778 0 77.77050005 1.4692499250000068 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1356 2.72858886384 0 77.621999525 1.6920007124999898 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1357 2.76271987895 0 77.38550045 2.0467493250000075 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1358 2.6038089013 0 78.050499725 1.0492504125000082 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1359 2.72858886384 0 77.51250005 1.8562499250000002 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1360 2.6038089013 0 78.268500375 0.7222494374999968 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1361 3.89181483916 0 77.5769989 1.75950164999999 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1362 1.96279955207 0 77.769000475 1.4714992875000021 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1363 3.71194409149 0 77.4570005 1.9394992499999901 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1364 2.89409550228 0 77.564500175 1.7782497374999906 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1365 2.53864202538 0 77.92350025 1.2397496249999946 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1366 3.53240628779 0 77.6799991 1.6050013499999949 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1367 2.33816231791 0 77.336499475 2.120250787500005 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1368 4.48813937768 0 78.18300035 0.8504994749999994 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1369 4.87565925629 0 78.0275003 1.0837495500000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1370 3.26850208106 0 77.29099965 2.188500524999995 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1371 3.52603923588 0 77.992000425 1.1369993624999992 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1372 2.8077544116 0 77.916000525 1.2509992124999982 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1373 3.45138702919 0 77.977000025 1.159499962500007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1374 4.13446115525 0 77.988500575 1.142249137499995 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1375 2.76355621145 0 77.767000175 1.4744997374999897 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1376 3.10050944598 0 78.0005005 1.1242492499999983 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1377 3.89292123452 0 77.795000075 1.4324998874999935 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1378 2.6666849235 0 77.0604998 2.5342502999999965 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1379 3.17822794726 0 77.944999725 1.2075004124999964 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1380 2.63995156218 0 78.0805 1.004249999999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1381 2.72831704128 0 78.0629995 1.0305007499999945 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1382 3.57575930671 0 77.562999725 1.780500412500004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1383 2.58610616022 0 78.158999825 0.8865002625000074 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1384 2.67371407651 0 77.774498925 1.4632516124999952 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1385 4.47713386599 0 78.0165001 1.1002498499999973 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1386 3.27045362948 0 77.94099985 1.2135002250000042 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1387 2.50108446251 0 78.3109989 0.658501649999998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1388 3.03387706496 0 77.619498875 1.6957516874999925 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1389 5.09612029865 0 78.0029999 1.120500149999991 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1390 3.89292123452 0 77.82250045 1.3912493249999898 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1391 4.59198750865 0 78.076499925 1.0102501125000103 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1392 3.76294547016 0 77.885000625 1.2974990624999947 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1393 4.69591915468 0 78.0605003 1.0342495499999984 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1394 4.59661176964 0 78.0494995 1.050750750000006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1395 3.35622156523 0 77.832000675 1.376998987499995 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1396 2.52809669417 0 78.148000525 0.9029992124999993 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1397 4.33470175092 0 78.211999275 0.8070010875000051 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1398 3.22311222914 0 77.279000125 2.206499812500006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1399 3.90290717352 0 78.24649945 0.7552508249999974 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1400 2.76216260511 0 77.7119996 1.557000600000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1401 3.6515641406 0 77.9660004 1.175999400000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1402 2.77222805663 0 77.897500075 1.2787498875000054 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1403 2.87435224664 0 77.01950065 2.5957490250000035 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1404 3.41779112394 0 77.96299935 1.180500974999994 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1405 3.91238549312 0 77.8559998 1.3410002999999904 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1406 4.76952130091 0 78.0144995 1.1032507500000008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1407 4.67427943592 0 78.152000275 0.896999587499991 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1408 2.39521498147 0 77.767500725 1.4737489124999925 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1409 3.76294547016 0 77.864499725 1.3282504124999974 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1410 4.36510071125 0 78.188500025 0.8422499625000057 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1411 3.27045362948 0 77.41499965 2.002500525000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1412 2.48971602595 0 77.766499475 1.475250787499995 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1413 1.7855629355 0 77.799995 1.4250075000000066 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1414 4.66234332773 0 77.9869996 1.1445005999999935 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1415 4.52821122249 0 78.0979993 0.9780010500000031 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1416 4.27983318032 0 78.155000475 0.8924992875000086 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1417 2.9784464594 0 77.4359997 1.9710004500000053 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1418 2.84775205688 0 78.013999875 1.1040001875000058 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1419 4.50808625337 0 78.180500775 0.8542488375000019 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1420 4.33470175092 0 78.177999575 0.8580006374999982 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1421 2.27314934897 0 76.994500125 2.6332498124999972 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1422 3.05997821259 0 77.4870001 1.8944998499999954 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1423 4.98028095379 0 78.02849925 1.0822511250000062 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1424 4.79205397957 0 78.038500425 1.0672493625000072 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf1425 4.90008498647 0 78.083500225 0.9997496625000082 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/data/autotuner_data/tuner_promise_confs_batch220_single.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/data/autotuner_data/tuner_promise_confs_batch220_single.txt index 052c8b4438..970a2f6b4d 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/data/autotuner_data/tuner_promise_confs_batch220_single.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/data/autotuner_data/tuner_promise_confs_batch220_single.txt @@ -10,5841 +10,5841 @@ conf1 1 0 78.75 0 ----- +++++ conf1 2.01998340478 0 78.6258339 0.5241660999999965 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf2 2.3847866076 0 78.4283332 0.48250020000000404 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf3 2.70007394171 0 78.39416715 0.5337492749999981 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf4 2.40933283482 0 78.3983333 0.5275000499999933 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf5 2.47665743514 0 78.45166625 0.6983337499999976 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf6 2.39521498147 0 78.377499225 0.558751162500009 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf7 2.03574912892 0 78.300833675 0.6737494874999896 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf8 1.66405270953 0 78.8608336 0.2891663999999935 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf9 2.12995187868 0 78.42249945 0.4912508249999945 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf10 2.12252361347 0 78.3466679 0.6049981500000001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf11 2.06569084995 0 78.4441662 0.45875070000000306 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf12 3.14140798624 0 78.35166645 0.5975003250000057 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf13 2.24152130544 0 78.3425 0.6112499999999983 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf14 2.29313412528 0 78.239166025 0.7662509624999956 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf15 2.20258261036 0 78.458333525 0.6916664749999996 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf16 2.29217450437 0 78.475834475 0.6741655250000008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf17 1.92405135546 0 78.35416605 0.5937509249999948 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf18 2.11514698074 0 78.5400008 0.6099991999999986 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf19 1.87210017687 0 78.447499675 0.45375048749999536 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf20 3.20613284165 0 78.374166925 0.5637496125000041 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf21 2.04639704852 0 78.4675 0.6824999999999989 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf22 2.96009700474 0 78.387500825 0.5437487624999946 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf23 3.369849424 0 78.32916615 0.6312507749999909 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf24 2.89256716615 0 78.33750065 0.6187490250000067 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf25 2.61874738541 0 78.4550001 0.6949998999999935 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf26 2.20258261036 0 78.57083385 0.5791661500000004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf27 2.06257943045 0 78.325834275 0.6362485874999919 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf28 2.89256716615 0 78.3883325 0.5425012499999937 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf29 1.99206710006 0 78.3549999 0.5925001500000064 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf30 2.3343756992 0 78.341666825 0.6124997624999935 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf31 2.29217450437 0 78.297499675 0.6787504875000039 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf32 2.50154135982 0 78.3100002 0.6599996999999931 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf33 1.7977525785 0 78.7299997 0.42000030000000665 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf34 3.1306351361 0 78.3141674 0.6537488999999965 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf35 2.64377420873 0 78.43166655 0.47750017499999586 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf36 2.24152130544 0 78.42999965 0.48000052500000123 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf37 2.43050085973 0 78.462500325 0.6874996750000065 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf38 2.68419601838 0 78.230833275 0.7787500875000077 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf39 2.14833063686 0 78.574166375 0.5758336249999957 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf40 2.1432868672 0 78.3550005 0.5924992499999959 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf41 1.68452312305 0 78.73333 0.41667000000000487 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf42 1.65600317448 0 78.133331 0.9250035000000025 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf43 2.1923561844 0 78.4075006 0.5137490999999912 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf44 1.89424199196 0 78.535832925 0.614167075000006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf45 3.42248933877 0 78.326665675 0.6350014874999985 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf46 1.8683963672 0 78.45916675 0.6908332500000057 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf47 1.94813942115 0 78.3724997 0.5662504499999912 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf48 1.95369833697 0 78.4008341 0.523748850000004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf49 1.76031414454 0 78.810001 0.3399990000000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf50 3.542000097 0 78.274166525 0.7137502125000026 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf51 2.14664674472 0 78.50083355 0.6491664500000042 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf52 1.92405135546 0 78.580833475 0.5691665249999943 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf53 1.98628801958 0 78.225000025 0.7874999625000001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf54 1.58691558324 0 78.433334 0.47499899999999684 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf55 1.99206710006 0 78.526666425 0.6233335749999981 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf56 1.58691558324 0 78.433334 0.47499899999999684 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf57 2.6038089013 0 78.41666655 0.5000001749999967 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf58 2.86533054339 0 78.429166875 0.4812496874999894 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf59 2.5635932838 0 78.394166475 0.5337502874999913 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf60 1.63231514248 0 78.133331 0.9250035000000025 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf61 2.07194193579 0 78.55666715 0.5933328500000045 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf62 2.59394423726 0 78.4616668 0.6883331999999968 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf63 2.44133377904 0 78.40999965 0.5100005249999953 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf64 2.31935108241 0 78.418333275 0.49750008750000774 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf65 2.28186154535 0 78.2866669 0.6949996499999997 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf66 1.88771378107 0 78.396666025 0.5300009624999973 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf67 3.28260543861 0 78.332499925 0.62625011250001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf68 2.93158007413 0 78.3800001 0.5549998499999944 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf69 2.24152130544 0 78.269999725 0.7200004124999921 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf70 2.17046411412 0 78.226666425 0.7850003624999928 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf71 2.3901980036 0 78.414167425 0.5037488624999966 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf72 2.20258261036 0 78.42083345 0.49374982499999476 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf73 1.79680900793 0 78.505833075 0.6441669250000018 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf74 2.03862830664 0 78.6199985 0.5300015000000059 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf75 1.8569945332 0 78.56750045 0.5824995500000029 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf76 1.95369833697 0 78.539165875 0.6108341250000052 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf77 2.06569084995 0 78.4458326 0.4562510999999958 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf78 2.18361621336 0 78.551667025 0.5983329749999996 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf79 2.14664674472 0 78.43916705 0.466249425000008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf80 1.92405135546 0 78.724165975 0.4258340249999947 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf81 3.24718776399 0 78.3708334 0.5687499000000074 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf82 2.83566297806 0 78.381667 0.5524995000000104 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf83 2.1510303661 0 78.436666475 0.47000028750000666 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf84 2.41124195235 0 78.447500425 0.4537493624999982 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf85 1.88123041257 0 78.49250045 0.6574995500000057 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf86 2.14664674472 0 78.5775003 0.572499700000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf87 1.79680900793 0 78.694167075 0.4558329250000043 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf88 2.02237001209 0 78.67083405 0.4791659500000037 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf89 1.82360897249 0 78.663332725 0.4866672749999964 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf90 2.18361621336 0 78.354166375 0.5937504374999918 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf91 1.68618253461 0 78.7716667 0.37833330000000276 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf92 3.14140798624 0 78.355833 0.5912504999999939 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf93 2.24152130544 0 78.4691669 0.6808330999999953 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf94 3.48564996739 0 78.33583295 0.6212505750000048 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf95 2.49016877877 0 78.4016665 0.5225002499999931 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf96 2.16634367244 0 78.366666775 0.5749998375000018 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf97 1.60552156231 0 78.133339 0.9249914999999902 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf98 2.48926343774 0 78.3775003 0.5587495500000088 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf99 3.11142905832 0 78.33083305 0.6287504250000069 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf100 3.08305776402 0 78.398333325 0.5275000125000062 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf101 2.59640338923 0 78.502499925 0.647500075000005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf102 2.36440403604 0 78.4100001 0.5099998499999927 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf103 3.06545828495 0 78.413333375 0.5049999375000098 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf104 1.561580129 0 78.366661 0.5750085000000098 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf105 2.10377358491 0 78.574166025 0.5758339750000033 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf106 3.28260543861 0 78.339167025 0.6162494625000079 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf107 2.51763860959 0 78.321666775 0.6424998375000044 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf108 2.24152130544 0 78.37416705 0.5637494250000046 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf109 1.88123041257 0 78.30083255 0.6737511750000067 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf110 1.99206710006 0 78.47083355 0.6791664500000053 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf111 2.67606536954 0 78.4125008 0.5062487999999945 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf112 1.63231514248 0 78.0 1.125 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf113 2.16360539179 0 78.3991667 0.5262499500000075 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf114 2.3343756992 0 78.379999675 0.5550004875000099 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf115 2.23420690358 0 78.490833225 0.6591667749999971 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf116 2.96586709623 0 78.414166075 0.5037508875000043 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf117 2.58903988183 0 78.322500425 0.6412493624999982 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf118 3.15989298643 0 78.379166025 0.5562509624999947 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf119 2.59492733884 0 78.2433332 0.7600002000000075 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf120 3.02884415997 0 78.363333675 0.5799994874999896 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf121 2.80948259281 0 78.390000625 0.5399990625000015 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf122 2.07194193579 0 78.608333025 0.5416669750000068 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf123 2.0051937949 0 78.435834225 0.47124866250000963 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf124 1.561580129 0 78.333336 0.6249959999999959 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 34 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 34 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf125 2.10636242153 0 78.525833925 0.6241660749999994 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf126 2.66590618726 0 77.763333475 1.479999787500006 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf127 3.02516396272 0 77.884999975 1.2975000375000008 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf128 2.27522664257 0 78.025831825 1.086252262500004 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf129 2.76271987895 0 77.709167625 1.5612485624999906 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf130 3.40081490105 0 77.92333335 1.2399999750000106 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf131 2.79400512644 0 78.079165625 1.0062515624999975 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf132 3.58371362677 0 78.24749945 0.7537508249999902 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf133 2.16275109817 0 77.8516661 1.3475008499999959 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf134 3.02516396272 0 78.091666625 0.987500062499997 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf135 2.51763860959 0 78.29666675 0.679999875 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf136 2.98884796399 0 77.69749905 1.578751424999993 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf137 2.85338933385 0 77.795000675 1.4324989875000043 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf138 2.38915544409 0 77.653334675 1.6449979875000054 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf139 2.24538057124 0 77.54916775 1.8012483750000072 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf140 3.70240971999 0 78.0641668 1.028749800000007 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf141 3.542000097 0 78.251666625 0.7475000625000021 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf142 2.81381236756 0 77.702501075 1.571248387499999 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf143 2.16275109817 0 77.61916605 1.696250924999994 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf144 2.17132451316 0 78.177498975 0.858751537499991 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf145 2.14731998471 0 77.715833625 1.5512495624999971 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf146 3.30478754695 0 77.8433334 1.3599998999999912 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf147 2.89409550228 0 77.705832625 1.566251062500001 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf148 3.11638574781 0 77.875000025 1.3124999624999916 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf149 2.27126418908 0 77.726667275 1.5349990875000046 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 34 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 34 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf150 1.540499209 0 78.299995 0.6750075000000066 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf151 2.9094681628 0 78.252499575 0.7462506374999975 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf152 2.29505577961 0 77.8708336 1.3187496000000039 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf153 3.84266973008 0 77.9358333 1.2212500500000019 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf154 2.73950635808 0 77.739999725 1.5150004124999938 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf155 3.14863117051 0 77.776665475 1.4600017874999978 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf156 2.75882367755 0 78.10583375 0.9662493749999967 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf157 1.60552156231 0 77.333336 2.124995999999996 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 35 add fp32 1 tanh fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 35 add fp16 1 tanh fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf158 2.17563676084 0 77.3866672 2.0449991999999924 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf159 3.09001480855 0 78.13416635 0.9237504749999985 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf160 1.96279955207 0 78.240832825 0.7637507625000026 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf161 2.34818605648 0 77.6583338 1.6374993000000089 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf162 3.22083792723 0 77.988334125 1.1424988125000084 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf163 2.41549528692 0 77.5875 1.7437499999999915 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf164 2.6215047202 0 77.724999925 1.5375001124999912 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf165 1.61737349131 0 77.26667 2.2249949999999927 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 35 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 35 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf166 1.8683963672 0 78.435000275 0.4724995875000104 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf167 2.83566297806 0 78.3624994 0.5812508999999935 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf168 2.45908953163 0 78.142500675 0.9112489875000094 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf169 2.78803206719 0 77.6300002 1.6799997000000033 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf170 2.34818605648 0 77.793333025 1.4350004625000068 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf171 3.30838052095 0 78.097500625 0.9787490625000075 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf172 2.68946786964 0 77.6158327 1.7012509500000021 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf173 2.89409550228 0 77.7600006 1.4849991000000031 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf174 2.74472283119 0 77.914166025 1.2537509624999998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf175 3.18710451669 0 78.14500085 0.9074987249999964 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf176 2.29428673152 0 78.021665925 1.0925011125000097 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf177 2.93786958446 0 77.71416625 1.5537506249999922 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf178 2.00828162094 0 77.7725002 1.4662497000000059 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf179 3.25529306887 0 77.90166585 1.2725012249999992 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf180 2.72858886384 0 77.793332925 1.4350006124999979 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf181 2.16104453321 0 77.90333325 1.2700001249999957 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf182 2.17823234081 0 77.578333675 1.7574994875000058 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf183 2.5540304249 0 78.0050003 1.117499549999991 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf184 2.86683022571 0 78.04083325 1.0637501249999914 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf185 1.74073411989 0 77.433342 1.9749870000000058 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf186 3.11638574781 0 77.655000325 1.6424995124999953 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf187 3.18710451669 0 77.884166 1.29875100000001 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf188 1.62843286633 0 78.266663 0.7250055000000089 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf189 2.20915635682 0 78.098333725 0.977499412499995 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf190 2.54477507927 0 78.166666375 0.8750004374999918 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf191 2.51763860959 0 78.17833355 0.8574996749999926 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf192 3.06580144126 0 77.67583365 1.611249524999998 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf193 3.53925369518 0 78.084999 0.9975015000000056 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf194 2.4337406276 0 77.8741665 1.3137502499999982 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf195 3.72963656481 0 77.89333295 1.285000574999998 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf196 2.16275109817 0 77.9108336 1.2587495999999945 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf197 2.84007388684 0 78.249166825 0.7512497624999952 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf198 3.08305776402 0 78.3541669 0.5937496500000066 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf199 1.67627481734 0 77.300003 2.1749954999999943 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf200 3.84266973008 0 77.789166675 1.4412499874999938 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf201 2.00445999726 0 77.79916735 1.426248974999993 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf202 3.24180661389 0 78.29750025 0.6787496250000018 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf203 2.5540304249 0 78.192500725 0.8362489124999968 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf204 2.64914454991 0 78.190833575 0.8387496375000012 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf205 3.59736474838 0 78.001666825 1.1224997624999986 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf206 2.00445999726 0 77.709167125 1.56124931250001 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf207 2.23238575577 0 77.580833325 1.7537500124999994 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf208 2.65814376706 0 78.244999925 0.7575001124999972 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf209 2.97779876864 0 77.65083345 1.64874982500001 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf210 3.42248933877 0 78.273333075 0.7150003875000053 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf211 2.98396322778 0 78.078332875 1.0075006874999985 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf212 3.13457659127 0 77.9049995 1.2675007499999964 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf213 2.20630884648 0 77.829166975 1.3812495374999898 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf214 2.35242170136 0 78.25166715 0.7474992749999956 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf215 3.48520639466 0 78.25583275 0.7412508750000057 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf216 3.11638574781 0 77.74666595 1.5050010750000098 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf217 3.39701846598 0 78.02666625 1.0850006249999922 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf218 2.23238575577 0 77.995833575 1.131249637499991 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf219 2.09813263107 0 77.979165975 1.1562510374999988 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf220 2.98396322778 0 77.924999225 1.2375011625000099 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf221 2.44242238448 0 77.7983329 1.4275006499999918 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf222 2.66590618726 0 77.830834025 1.3787489624999978 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf223 2.43612195203 0 78.434167675 0.47374848750000353 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf224 3.26928242088 0 77.946667175 1.2049992374999974 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf225 2.59640338923 0 78.41833385 0.49749922500000565 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf226 1.94883255504 0 78.098333575 0.9774996375000029 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf227 2.01775107059 0 77.572500275 1.7662495875000062 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf228 2.64786391927 0 77.929166775 1.2312498375000018 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf229 2.46129952706 0 77.86750025 1.3237496249999907 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf230 1.92906574394 0 77.800833175 1.423750237500009 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf231 2.17132451316 0 78.174166425 0.8637503625000065 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf232 2.93786958446 0 77.677500025 1.6087499624999992 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf233 2.11107096401 0 77.924165825 1.2387512624999957 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf234 3.35910310464 0 78.12250065 0.9412490249999905 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf235 2.24980746522 0 77.8475005 1.353749250000007 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf236 2.89104044336 0 77.789999725 1.440000412499998 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf237 2.48136969546 0 77.885832625 1.2962510624999908 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf238 1.74018108892 0 77.795832775 1.4312508375000093 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 35 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 35 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf239 2.07523892514 0 77.334167475 2.1237487874999985 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf240 3.59736474838 0 78.0583328 1.0375007999999966 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf241 1.66455840456 0 77.200005 2.3249924999999934 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 35 add fp32 1 tanh fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 35 add fp16 1 tanh fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf242 3.59736474838 0 77.869166975 1.3212495375000017 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf243 2.71748837209 0 78.194166325 0.8337505125000035 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf244 2.4524832936 0 77.79416675 1.4337498749999966 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 34 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 34 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf245 1.80975096023 0 77.8899994 1.2900009000000097 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 34 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 34 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf246 3.64088439105 0 77.9983331 1.1275003500000054 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf247 1.99424293451 0 77.23333 2.2750050000000073 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf248 2.92188437688 0 77.82999985 1.3800002250000105 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf249 3.36364124845 0 78.208333175 0.8125002374999895 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf250 1.69337635738 0 77.699997 1.5750045000000057 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf251 2.3912414734 0 77.454999025 1.9425014624999903 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 34 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 34 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf252 2.2351185922 0 77.888333 1.2925004999999956 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf253 2.72858886384 0 77.5358331 1.8212503499999926 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf254 2.22694008233 0 77.8283333 1.3825000500000044 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf255 2.19569574585 0 77.805000325 1.417499512500008 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf256 3.01849555693 0 77.5833332 1.7500002000000023 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf257 2.19569574585 0 77.389999575 2.0400006374999933 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf258 2.01775107059 0 77.844166425 1.358750362500004 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf259 2.7795432921 0 78.004999925 1.1175001124999895 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf260 2.16446036058 0 78.245833625 0.7562495624999954 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf261 2.58050173724 0 77.935832575 1.2212511374999906 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf262 2.90329954283 0 77.738334525 1.5174982125000014 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf263 3.45051734728 0 77.903333475 1.269999787500005 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf264 3.24411063565 0 78.197500075 0.8287498875000097 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf265 2.83830787766 0 77.790834 1.4387489999999943 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf266 2.04182004566 0 78.04249965 1.0612505250000055 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf267 2.36542510121 0 77.42583335 1.9862499749999927 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 34 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 34 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf268 2.24980746522 0 77.799999375 1.425000937500002 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf269 1.89227879259 0 77.91749935 1.248750975 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf270 1.88123041257 0 78.80333275 0.3466672500000044 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf271 3.07993730408 0 78.325000075 0.6374998874999918 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf272 2.7795432921 0 78.15666565 0.8900015250000095 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf273 2.24796080644 0 77.7641655 1.4787517499999936 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 34 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 34 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf274 2.25629473251 0 77.3750008 2.0624988000000073 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf275 3.33253479352 0 78.373332825 0.5650007624999915 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf276 2.48136969546 0 77.792499675 1.436250487499997 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf277 3.46799468161 0 77.8525001 1.3462498499999995 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf278 2.75161695447 0 78.2841667 0.6987499499999998 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf279 2.78803206719 0 77.8383327 1.3675009500000073 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf280 3.06580144126 0 77.6600006 1.6349990999999946 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf281 2.95307508795 0 77.830833225 1.3787501624999905 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf282 2.0579298361 0 77.759999325 1.4850010125000068 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 35 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 35 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf283 3.44574192026 0 78.287500025 0.6937499625000001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf284 2.16275109817 0 77.733333075 1.5250003874999933 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf285 3.72963656481 0 77.990000175 1.1399997374999913 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf286 2.0051937949 0 78.415000175 0.5024997374999955 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf287 2.07823096988 0 77.93916665 1.2162500249999937 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf288 1.96561700982 0 77.73000035 1.5299994750000039 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf289 2.29505577961 0 77.8333338 1.3749992999999918 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 34 add fp32 1 tanh fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 34 add fp16 1 tanh fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf290 3.66721064524 0 77.8116667 1.4074999499999947 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf291 3.82389130321 0 77.93 1.2299999999999898 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf292 2.44460251046 0 78.28166635 0.7025004750000079 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf293 2.15594095941 0 77.4183335 1.9974997499999958 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 34 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 34 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf294 2.4337406276 0 77.829165775 1.3812513374999895 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf295 2.98884796399 0 77.9175001 1.248749850000003 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf296 2.27522664257 0 78.02666625 1.0850006249999922 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf297 2.83419571826 0 78.140833925 0.9137491125000068 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf298 2.98104004245 0 78.109166525 0.9612502124999907 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf299 2.22694008233 0 77.81166655 1.4075001750000027 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf300 3.59076159103 0 77.999166425 1.1262503625000022 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf301 1.59477017142 0 78.299995 0.6750075000000066 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf302 2.99538587737 0 78.17166735 0.8674989749999895 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf303 2.24980746522 0 78.02500095 1.0874985749999908 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf304 3.82389130321 0 77.728333225 1.5325001624999999 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf305 3.06580144126 0 77.6491664 1.6512504000000021 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf306 2.6708458791 0 78.36916735 0.5712489750000032 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf307 1.75142089738 0 78.28416715 0.6987492749999973 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf308 2.63512538337 0 78.245000275 0.757499587500007 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf309 2.22694008233 0 77.810833725 1.4087494125000077 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf310 2.90329954283 0 77.685000025 1.5974999625000095 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf311 3.48564996739 0 78.304166975 0.6687495374999983 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf312 2.46129952706 0 77.8291661 1.3812508500000078 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf313 3.0234941006 0 77.720000225 1.5449996624999898 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf314 2.78803206719 0 77.630832525 1.6787512124999964 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf315 1.83153605016 0 78.329999325 0.6300010124999957 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf316 2.98396322778 0 77.99583315 1.1312502750000064 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf317 2.98396322778 0 77.8950001 1.2824998499999936 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf318 3.45051734728 0 78.096665625 0.9800015625 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf319 2.87133870651 0 77.78500025 1.447499625000006 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf320 2.85338933385 0 77.722500575 1.541249137500003 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf321 3.49811600913 0 78.051667525 1.0474987125000013 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf322 3.11638574781 0 77.666666675 1.6249999874999972 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf323 3.26928242088 0 77.822500225 1.3912496625000017 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf324 3.77643621697 0 77.9308329 1.228750650000002 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf325 3.13457659127 0 78.079166175 1.0062507375000038 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf326 2.52111501064 0 77.8916662 1.2875006999999954 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 34 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 34 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf327 2.3901980036 0 78.4191667 0.49624994999999217 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf328 2.5540304249 0 77.8974997 1.278750450000004 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf329 2.08376527652 0 78.076667975 1.0099980374999902 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf330 2.82425847867 0 77.597498625 1.7287520625 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf331 2.95498685009 0 78.008332425 1.1125013624999909 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf332 3.66721064524 0 77.970833175 1.1687502375000065 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf333 2.24980746522 0 77.759166525 1.4862502125000034 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf334 2.11760015464 0 77.669999875 1.620000187499997 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf335 2.50405728196 0 77.708333375 1.5624999375000073 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf336 3.71194409149 0 77.987499775 1.1437503374999949 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf337 2.70433982991 0 77.603333275 1.7200000875000043 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf338 2.6262810908 0 77.976667425 1.1599988624999966 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf339 2.25350989972 0 77.9949997 1.132500450000009 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf340 3.35910310464 0 77.893333275 1.285000087499995 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf341 3.77591555364 0 77.8466659 1.3550011499999925 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf342 3.63363848948 0 77.6966664 1.580000400000003 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf343 2.95498685009 0 78.0666676 1.0249985999999964 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf344 2.97779876864 0 77.59999905 1.7250014250000092 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf345 3.63363848948 0 77.896666575 1.2800001375000036 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf346 3.01849555693 0 77.669999725 1.620000412500005 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf347 1.69337635738 0 78.233337 0.7749944999999911 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf348 3.45487014523 0 78.031666225 1.0775006625000074 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf349 2.99342150206 0 77.65666695 1.6399995749999974 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf350 3.05895287958 0 77.964999825 1.17750026249999 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf351 3.30478754695 0 77.97250065 1.166249024999999 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf352 3.16135199481 0 77.633332675 1.6750009874999918 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf353 2.80373024506 0 78.070832475 1.0187512874999953 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf354 2.09813263107 0 78.0083335 1.1124997499999907 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf355 3.19453962237 0 77.770000625 1.4699990625000083 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf356 2.9094681628 0 77.677500475 1.6087492874999967 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf357 2.64786391927 0 77.63583395 1.6712490749999915 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf358 2.17823234081 0 77.741666975 1.5124995374999983 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf359 2.1432868672 0 78.15833395 0.8874990750000009 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf360 2.11514698074 0 78.479999675 0.6700003249999981 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf361 1.90015610771 0 77.813332925 1.4050006125000039 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf362 2.99538587737 0 78.02499965 1.087500525000003 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf363 2.48881101405 0 78.280000425 0.7049993625000042 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf364 1.85888516523 0 77.5 1.875 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf365 3.71194409149 0 77.8299993 1.3800010500000042 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf366 2.08281444583 0 77.933333425 1.224999862499999 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf367 2.66590618726 0 78.1133333 0.9550000500000095 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf368 2.24888375674 0 77.844165625 1.3587515624999966 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf369 3.37566443263 0 78.118332875 0.9475006875000105 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf370 3.89181483916 0 77.944167275 1.2087490875000029 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf371 2.3343756992 0 78.34333455 0.6099981750000083 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf372 2.73950635808 0 77.709167125 1.56124931250001 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 31 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf373 2.73376380311 0 77.810000475 1.409999287500007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf374 2.57467948341 0 77.89083425 1.2887486250000038 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf375 1.89424199196 0 78.485832075 0.6641679249999953 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf376 3.49454519355 0 77.8066671 1.4149993500000022 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf377 1.76439678846 0 77.765000375 1.4774994375000006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 35 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 35 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf378 2.04029892443 0 77.970833925 1.1687491125000093 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf379 3.49454519355 0 77.930000375 1.2299994374999912 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf380 3.24718776399 0 78.372501025 0.566248462499992 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf381 3.11638574781 0 77.618333675 1.6974994874999965 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf382 2.57467948341 0 78.11749985 0.9487502249999977 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf383 3.01849555693 0 77.804999725 1.4175004124999973 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf384 2.40299088586 0 77.9716665 1.1675002500000033 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf385 3.20613284165 0 78.3483334 0.602499899999998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf386 2.89104044336 0 77.71583315 1.5512502750000081 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf387 2.72018747284 0 78.090000325 0.9899995124999919 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf388 3.06580144126 0 77.6383333 1.667500050000001 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf389 2.39458670776 0 78.169167125 0.871249312499998 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf390 3.25529306887 0 78.100000275 0.974999587500001 -1 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf391 2.11694541871 0 78.165834275 0.8762485875000081 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf392 2.0941218638 0 77.7858332 1.4462502000000015 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf393 3.19453962237 0 77.5908335 1.7387497499999967 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf394 2.03272718838 0 77.99666555 1.1300016749999955 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf395 3.16135199481 0 77.81666755 1.399998674999992 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf396 1.9453718091 0 78.045000025 1.0574999625000103 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf397 3.30838052095 0 77.929999925 1.2300001124999937 -1 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf398 1.95369833697 0 78.654167425 0.4958325750000029 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf399 2.91566305143 0 78.091666425 0.9875003625000005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf400 2.79400512644 0 77.817500475 1.3987492874999958 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf401 4.42219194672 0 78.2383339 0.767499149999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf402 3.01949394303 0 77.418333025 1.9975004625000068 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf403 3.1705014109 0 77.385833225 2.0462501625000016 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf404 4.26782827201 0 78.063334375 1.029998437500005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf405 2.7974298081 0 77.765000075 1.4774998874999952 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf406 4.66234332773 0 78.051667025 1.0474994624999994 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf407 3.44574192026 0 78.334166525 0.6237502124999992 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf408 3.91238549312 0 77.97583325 1.1612501250000093 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf409 5.09612029865 0 78.014167025 1.1037494624999908 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf410 2.57346981923 0 77.40749955 2.0137506750000043 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf411 2.79087319279 0 77.4591663 1.9362505499999898 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf412 2.24225538055 0 77.60666765 1.7149985249999915 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf413 4.35330525145 0 78.2025004 0.8212493999999921 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf414 3.6355679084 0 78.34750005 0.6037499250000096 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 7 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf415 2.09813263107 0 77.782499825 1.4512502624999968 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf416 3.69591437391 0 77.8908339 1.288749149999994 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf417 3.70341103092 0 77.961666825 1.182499762500008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf418 4.50808625337 0 78.152499875 0.8962501874999944 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf419 3.6350853616 0 77.693333625 1.584999562500009 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf420 3.4088575296 0 77.734167475 1.52374878749999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf421 2.84775205688 0 77.76833325 1.4725001250000034 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf422 3.42248933877 0 78.276666475 0.7100002875000015 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf423 2.63995156218 0 78.009166925 1.1112496124999964 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf424 3.14537663121 0 77.1300003 2.429999549999991 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf425 4.43580061952 0 78.145833225 0.9062501624999939 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf426 3.29564085979 0 78.164166575 0.8787501375000062 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf427 3.36364124845 0 78.29666615 0.6800007750000105 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf428 3.89292123452 0 77.83166695 1.3774995750000016 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf429 2.32921384149 0 77.6116661 1.7075008500000095 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf430 1.99715145162 0 78.1408336 0.9137496000000098 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf431 2.38209008774 0 77.344166925 2.108749612500006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf432 5.01218173084 0 78.131665725 0.9275014124999927 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf433 3.42634295097 0 77.703333125 1.5700003124999995 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf434 3.96334183535 0 77.94666705 1.204999424999997 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf435 2.76355621145 0 77.754999925 1.4925001124999895 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf436 3.542000097 0 78.261667625 0.7324985624999982 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf437 3.55210680761 0 77.912499025 1.2562514624999963 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf438 4.60279195815 0 78.219165625 0.7962515624999966 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf439 3.78112865648 0 77.954999725 1.19250041250001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf440 3.58371362677 0 78.213333575 0.8049996375000106 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf441 1.95648970844 0 77.958332825 1.187500762500001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf442 3.87145936777 0 78.21166725 0.8074991249999925 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf443 2.51025423632 0 77.799999 1.4250015000000005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf444 2.6313276887 0 77.058333425 2.537499862499999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf445 3.09350511825 0 78.15583305 0.8912504250000026 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf446 3.23338646124 0 77.3483334 2.102499899999998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf447 4.59661176964 0 78.16250005 0.8812499249999917 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf448 2.10636242153 0 77.625834225 1.6862486624999917 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf449 3.28615031572 0 77.718333425 1.547499862500004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf450 2.06631426065 0 78.1050001 0.9674998500000029 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf451 2.92157268012 0 77.90666715 1.2649992749999939 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf452 2.76216260511 0 77.796666575 1.430000137499995 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf453 3.22311222914 0 77.316665925 2.150001112500007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf454 3.12848942641 0 77.253333325 2.2450000125000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf455 4.59198750865 0 78.0950007 0.9824989500000001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf456 3.43063493306 0 78.29000005 0.6899999249999951 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf457 3.1705014109 0 77.2558326 2.2412510999999924 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf458 2.57346981923 0 78.114167225 0.9537491624999959 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf459 4.05728597619 0 78.259167625 0.7362485624999948 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf460 3.17822794726 0 78.03166725 1.0774991250000028 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf461 4.13446115525 0 78.10249875 0.9712518750000072 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf462 3.07993730408 0 78.222501075 0.7912483875000049 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf463 3.31960879381 0 77.75333405 1.4949989249999902 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf464 3.06923723471 0 78.10749925 0.9637511249999946 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf465 3.15989298643 0 78.4099997 0.5100004499999997 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf466 1.88381840849 0 78.068333425 1.0224998624999913 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf467 2.14345461127 0 77.783333975 1.4499990374999925 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf468 3.5433748969 0 78.10083375 0.9737493749999899 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf469 2.48069541785 0 77.953334375 1.1949984375000042 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf470 2.61724582801 0 78.066667275 1.0249990874999995 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf471 3.13780880773 0 77.38166675 2.0524998750000094 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf472 3.14537663121 0 78.0433336 1.0599996000000047 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf473 2.94386966745 0 77.93666765 1.219998524999994 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf474 4.87565925629 0 78.070832475 1.0187512874999953 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf475 2.37877965366 0 77.91666685 1.2499997250000021 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf476 2.37444864695 0 77.638333925 1.6674991125000034 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf477 3.11780484681 0 77.73833275 1.5175008750000032 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf478 2.35242170136 0 78.116666425 0.950000362499992 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf479 2.29948783073 0 77.70083405 1.5737489250000039 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf480 4.39310849558 0 78.1566664 0.890000399999991 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf481 2.98396322778 0 78.04500105 1.0574984250000057 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf482 2.67371407651 0 77.9333336 1.2249996000000039 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf483 3.13565325662 0 77.197499825 2.3287502625000087 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf484 2.6262810908 0 78.095832075 0.9812518874999938 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf485 3.14140798624 0 78.30083365 0.6737495249999981 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf486 3.76708619178 0 77.969999475 1.1700007875000082 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf487 4.28586099646 0 78.05083425 1.0487486250000089 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf488 4.21982434853 0 78.197500425 0.8287493624999982 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf489 4.47713386599 0 78.158333425 0.8874998625000075 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf490 2.58610616022 0 78.057499875 1.0387501874999927 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf491 2.26900612048 0 77.415832725 2.001250912499998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf492 3.02482984275 0 77.2850007 2.1974989500000035 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf493 1.67627481734 0 77.466667 1.9249994999999984 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf494 3.40081490105 0 78.062499825 1.031250262499995 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf495 4.76952130091 0 78.081666625 1.0025000625000047 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf496 3.82870249017 0 77.916667575 1.249998637499992 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf497 4.50808625337 0 78.165832375 0.8762514375000094 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf498 4.58200012548 0 78.2566666 0.740000099999996 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf499 3.26850208106 0 77.291665975 2.187501037499999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf500 3.27045362948 0 77.225833075 2.2862503875000044 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf501 3.93656682897 0 78.18666705 0.8449994250000046 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf502 2.60281905984 0 77.33750125 2.118748124999996 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf503 1.98599994561 0 77.3616673 2.08249905000001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf504 3.369849424 0 78.344999875 0.6075001875000012 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf505 3.99339287342 0 78.27666705 0.7099994249999995 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf506 2.6537650949 0 77.456666975 1.9399995374999932 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf507 3.50662601431 0 78.01749995 1.098750074999998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf508 4.07904045576 0 78.19666655 0.830000174999995 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf509 3.96334183535 0 77.91833225 1.247501624999991 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf510 2.95307508795 0 78.20333425 0.8199986250000038 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf511 3.6583930271 0 77.99583395 1.1312490749999924 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf512 4.37206912378 0 78.210834 0.8087489999999917 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf513 3.11142905832 0 78.360000425 0.5849993625000067 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf514 4.49698282055 0 78.049167125 1.0512493125000049 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf515 3.05997821259 0 77.234166475 2.2737502875000075 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf516 4.06752993595 0 78.2175 0.7987499999999983 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf517 4.69511411122 0 77.966666725 1.174999912500006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf518 2.48926343774 0 78.28166715 0.7024992749999939 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf519 3.05997821259 0 78.077499525 1.0087507125000101 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf520 4.44155567719 0 78.173332975 0.8650005375000092 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf521 3.1270605866 0 78.37666665 0.5600000249999937 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf522 3.03387706496 0 77.76833345 1.4724998249999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf523 1.93165087062 0 77.835000775 1.3724988375000038 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf524 3.52603923588 0 78.253333575 0.7449996375000012 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf525 4.623773346 0 78.157498675 0.888751987500001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf526 2.89378970586 0 77.949999975 1.2000000375000042 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf527 4.89920842557 0 78.030833225 1.0787501625000075 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf528 3.27045362948 0 78.001666775 1.1224998374999942 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf529 4.36510071125 0 78.22416655 0.7887501749999899 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf530 3.98815916413 0 78.033333725 1.0749994124999915 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf531 3.31800009086 0 77.3091659 2.1612511500000053 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf532 2.94070867727 0 77.4541666 1.9437501000000097 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf533 3.5433748969 0 78.108332725 0.9625009125000048 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 6 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf534 2.26300649679 0 77.59666755 1.7299986749999903 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf535 2.7974298081 0 77.9049997 1.267500449999993 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf536 2.59985404578 0 77.8508327 1.348750950000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf537 2.32921384149 0 78.499166175 0.6508338250000009 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf538 3.20613284165 0 78.336666825 0.619999762500008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf539 4.00390160999 0 78.094167025 0.9837494624999934 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf540 4.82158183139 0 78.18749925 0.8437511249999972 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf541 2.51025423632 0 77.735833825 1.5212492624999996 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf542 4.42219194672 0 78.2124997 0.8062504500000074 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf543 3.6583930271 0 78.06416625 1.0287506250000007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf544 2.43006954226 0 77.7833328 1.450000800000005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf545 3.08305776402 0 78.35250035 0.5962494750000005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf546 2.55141313335 0 78.160000425 0.8849993624999897 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf547 3.76708619178 0 78.090000175 0.9899997374999998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf548 2.03862830664 0 78.6366662 0.5133338000000066 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf549 3.78112865648 0 77.9474998 1.2037502999999958 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf550 4.60279195815 0 78.17666665 0.860000024999998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 5 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf551 2.85576960676 0 77.863332975 1.3300005374999913 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf552 3.13565325662 0 78.11416675 0.9537498750000069 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf553 3.6399166016 0 78.323333375 0.6399999374999936 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf554 3.52150537634 0 77.719999475 1.5450007875000082 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf555 3.55210680761 0 77.904999525 1.2675007125000093 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf556 3.33253479352 0 78.405833175 0.5162502375000031 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf557 2.29948783073 0 77.8283334 1.382499899999992 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf558 2.98396322778 0 77.880832825 1.3037507625000018 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf559 3.91238549312 0 77.929998975 1.2300015374999944 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf560 3.69591437391 0 77.944999875 1.2075001875000098 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf561 3.12527637116 0 77.4366663 1.9700005500000017 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf562 2.76355621145 0 77.5766677 1.7599984499999977 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf563 3.82870249017 0 77.8450003 1.357499550000007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf564 4.98028095379 0 78.07416665 1.0137500250000073 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf565 3.06923723471 0 77.241666175 2.262500737499991 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 4 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf566 3.24411063565 0 78.2841675 0.6987487500000071 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf567 4.2459932947 0 78.1333334 0.9249999000000031 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 6 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf568 3.20200949945 0 78.170832425 0.8687513624999994 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf569 3.89292123452 0 77.909999275 1.2600010874999938 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf570 2.42920736643 0 78.20916705 0.8112494249999926 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf571 3.31960879381 0 77.9474999 1.2037501500000047 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf572 4.77617552809 0 78.180000725 0.854998912500001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 5 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf573 3.17822794726 0 77.11833265 2.447501025000001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf574 1.85623935679 0 77.859166525 1.3362502124999907 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf575 3.35622156523 0 77.91166665 1.2575000249999988 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf576 2.06413396769 0 78.079999725 1.00500041250001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf577 2.57952953365 0 78.0558332 1.0412502000000075 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf578 2.55784699441 0 78.1500002 0.8999997000000093 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf579 3.48520639466 0 78.24666575 0.755001374999992 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf580 4.04291144613 0 78.278333075 0.7075003874999908 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf581 2.47777777778 0 77.960000325 1.1849995125000063 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf582 2.79685844492 0 78.4041662 0.5187506999999911 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf583 3.12527637116 0 77.344999475 2.1075007875000082 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 4 -3 gpu conv samp 34 add fp32 1 tanh fp32 1 +3 gpu conv samp 34 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- +++++ conf584 4.79877127275 0 78.166665675 0.8750014874999934 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 4 5 promise swing_level 6 -6 gpu mul fp16 1 add fp32 1 -7 gpu softmax fp32 1 +6 gpu mul fp16 1 add fp16 1 +7 gpu softmax fp16 1 ----- diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet2/data/autotuner_data/tuner_confs_batch220.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet2/data/autotuner_data/tuner_confs_batch220.txt index cb1dd991ec..50e026a7bf 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet2/data/autotuner_data/tuner_confs_batch220.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet2/data/autotuner_data/tuner_confs_batch220.txt @@ -1,1771 +1,1771 @@ +++++ conf1 1 0 84.76 0 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp32 1 add fp32 1 +1 gpu conv fp32 1 add fp32 1 tanh fp32 1 +2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +3 gpu conv fp32 1 add fp32 1 tanh fp32 1 +4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +5 gpu conv fp32 1 add fp32 1 tanh fp32 1 +6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +7 gpu mul fp32 1 add fp32 1 8 gpu softmax fp32 1 ----- +++++ conf1 1.9038241963 0 84.979996 0.18000400000000527 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf2 1.64842307895 0 84.680008 0.4799920000000043 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf3 1.72558649193 0 84.500008 0.6599920000000111 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf4 1.82944424391 0 84.279999 0.7200015000000022 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf5 1.82944424391 0 84.199997 0.8400045000000134 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf6 1.64842307895 0 84.139999 0.930001500000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf7 1.72558649193 0 84.919998 0.24000199999999838 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf8 1.79160865678 0 85.259995 -0.09999499999999839 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf9 1.82944424391 0 84.400002 0.5399970000000067 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf10 1.70856970404 0 85.300003 -0.14000299999999866 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf11 1.64842307895 0 84.12001 0.9599850000000174 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf12 1.82944424391 0 84.339996 0.6300060000000087 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf13 1.81032878247 0 84.259995 0.7500075000000024 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf14 1.64842307895 0 84.259995 0.7500075000000024 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf15 1.9038241963 0 84.900002 0.2599980000000045 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf16 1.82944424391 0 84.400002 0.5399970000000067 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf17 1.74294565288 0 84.139999 0.930001500000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf18 1.88313156795 0 84.159996 0.9000059999999976 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf19 1.66425732228 0 85.220001 -0.0600009999999912 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf20 1.63288729942 0 84.259995 0.7500075000000024 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf21 1.74294565288 0 84.559998 0.600002000000012 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf22 1.64842307895 0 84.279999 0.7200015000000022 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf23 1.72558649193 0 84.119995 0.9600075000000032 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf24 1.72558649193 0 85.080002 0.07999800000001189 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf25 1.82944424391 0 84.18 0.8699999999999974 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 24 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf26 1.79160865678 0 84.220001 0.8099985000000132 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf27 1.70856970404 0 85.699997 -0.5399969999999911 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf28 1.81032878247 0 84.400002 0.5399970000000067 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf29 1.70856970404 0 85.479996 -0.31999599999999473 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf30 1.72558649193 0 84.120003 0.9599955000000122 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf31 1.79160865678 0 84.18 0.8699999999999974 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf32 1.9038241963 0 84.82 0.34000000000001196 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf33 1.82944424391 0 84.259995 0.7500075000000024 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf34 1.64842307895 0 85.280006 -0.12000599999999506 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf35 1.73481954522 0 83.559998 1.800003000000018 -1 gpu conv perf 30 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 21 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 30 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf36 1.70856970404 0 83.800003 1.439995500000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf37 1.81032878247 0 83.639999 1.680001500000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 24 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf38 1.72558649193 0 83.619995 1.7100075000000032 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf39 1.81032878247 0 84.400002 0.5399970000000067 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf40 1.72558649193 0 85.020004 0.139996000000005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf41 1.80156379054 0 83.720009 1.5599865000000008 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 21 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf42 1.79160865678 0 83.5 1.8900000000000077 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf43 1.67284410055 0 84.040001 1.079998500000002 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf44 1.73481954522 0 85.540001 -0.3800009999999986 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf45 1.79160865678 0 84.319992 0.6600120000000089 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf46 1.61991030088 0 83.580002 1.7699970000000178 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv perf 25 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv perf 25 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf47 1.61991030088 0 83.860001 1.3499985000000123 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv perf 28 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv perf 28 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf48 1.79160865678 0 84.18 0.8699999999999974 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf49 1.71762107501 0 85.540001 -0.3800009999999986 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf50 1.72558649193 0 84.720001 0.4399990000000088 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf51 1.82049363128 0 83.879997 1.3200045000000031 -1 gpu conv perf 25 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf52 1.82049363128 0 84.12001 0.9599850000000174 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf53 1.72558649193 0 83.860001 1.3499985000000123 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 21 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf54 1.73481954522 0 85.419998 -0.2599980000000016 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf55 1.7523659141 0 84.759995 0.4000050000000016 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf56 1.65684682663 0 84.800003 0.35999700000000134 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 24 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf57 1.88313156795 0 83.720001 1.5599985000000132 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf58 1.82049363128 0 84.060005 1.0499925000000019 -1 gpu conv perf 24 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 24 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf59 1.82049363128 0 84.239998 0.7800030000000078 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf60 1.7523659141 0 84.379997 0.5700045000000031 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf61 1.82049363128 0 84.120003 0.9599955000000122 -1 gpu conv perf 25 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf62 1.70856970404 0 83.900002 1.2899970000000067 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf63 1.79160865678 0 83.559998 1.800003000000018 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf64 1.71762107501 0 85.040001 0.11999900000000141 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf65 1.81032878247 0 83.740005 1.529992500000013 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf66 1.73481954522 0 84.719994 0.44000600000000534 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf67 1.70856970404 0 84.18 0.8699999999999974 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf68 1.72558649193 0 84.18 0.8699999999999974 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf69 1.61991030088 0 83.480003 1.919995500000013 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv perf 25 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv perf 25 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf70 1.82049363128 0 83.860001 1.3499985000000123 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 24 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf71 1.58558171041 0 85.639999 -0.47999899999999796 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf72 1.88313156795 0 83.639999 1.680001500000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf73 1.63288729942 0 85.800003 -0.6400029999999987 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf74 1.73481954522 0 83.680008 1.6199880000000064 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 21 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf75 1.81032878247 0 83.939995 1.2300075000000135 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf76 1.67284410055 0 83.639999 1.680001500000003 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf77 1.7523659141 0 83.599998 1.7400030000000086 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf78 1.61991030088 0 84.119995 0.9600075000000032 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf79 1.81032878247 0 84.259995 0.7500075000000024 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf80 1.71762107501 0 84.860001 0.29999900000000823 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf81 1.81032878247 0 84.080002 1.0199970000000178 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf82 1.67284410055 0 84.680008 0.4799920000000043 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf83 1.82049363128 0 84.199997 0.8400045000000134 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf84 1.82049363128 0 83.959999 1.2000015000000133 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf85 1.88313156795 0 84.120003 0.9599955000000122 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf86 1.61237267544 0 85.340004 -0.18000399999998817 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf87 1.73481954522 0 83.919998 1.2600029999999975 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf88 1.67284410055 0 84.600006 0.5599940000000118 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf89 1.72558649193 0 84.400002 0.5399970000000067 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf90 1.66425732228 0 85.800003 -0.6400029999999987 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf91 1.74294565288 0 84.559998 0.600002000000012 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf92 1.82049363128 0 84.440002 0.47999699999999734 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf93 1.71762107501 0 84.360001 0.5999985000000123 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf94 1.81032878247 0 84.240005 0.779992500000013 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf95 1.66425732228 0 83.619995 1.7100075000000032 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf96 1.88313156795 0 83.479996 1.9200060000000079 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf97 1.77662432349 0 83.620003 1.7099955000000122 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf98 1.64115261583 0 86.300003 -1.1400029999999988 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf99 1.80156379054 0 84.239998 0.7800030000000078 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf100 1.79160865678 0 84.099998 0.9900030000000086 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf101 1.70856970404 0 85.699997 -0.5399969999999911 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf102 1.7669421638 0 83.280006 2.2199910000000074 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf103 1.70269284588 0 83.780006 1.4699910000000074 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf104 1.7523659141 0 83.620003 1.7099955000000122 -1 gpu conv perf 24 add fp32 1 tanh fp32 1 -2 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 24 add fp16 1 tanh fp16 1 +2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf105 1.82049363128 0 82.980003 2.669995500000013 -1 gpu conv perf 25 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf106 1.80156379054 0 83.259995 2.2500075000000024 -1 gpu conv perf 24 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 24 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf107 1.65684682663 0 83.300003 2.189995500000002 -1 gpu conv perf 25 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf108 1.65684682663 0 82.940002 2.7299969999999973 -1 gpu conv perf 24 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 24 add fp16 1 tanh fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf109 1.7523659141 0 83.820007 1.4099895000000018 -1 gpu conv perf 25 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf110 1.83623037965 0 83.68 1.6199999999999974 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf111 1.83623037965 0 83.759995 1.5000075000000024 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf112 1.83623037965 0 82.959999 2.7000015000000133 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 -6 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 +6 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf113 1.74910416379 0 83.099991 2.4900135000000034 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf114 1.82049363128 0 83.440002 1.9799969999999973 -1 gpu conv perf 25 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf115 1.7523659141 0 82.800003 2.939995500000002 -1 gpu conv perf 25 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf116 1.83623037965 0 83.080002 2.519997000000018 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf117 1.80156379054 0 83.059998 2.550003000000018 -1 gpu conv perf 24 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 24 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf118 1.82049363128 0 83.699997 1.5900045000000134 -1 gpu conv perf 25 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf119 1.7669421638 0 83.360001 2.0999985000000123 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf120 1.7669421638 0 84.219994 0.810009000000008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf121 1.7523659141 0 83.680008 1.6199880000000064 -1 gpu conv perf 25 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf122 1.82049363128 0 83.400002 2.0399970000000067 -1 gpu conv perf 30 add fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 30 add fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf123 1.93249146701 0 82.860001 2.8499985000000123 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf124 1.93249146701 0 83.12001 2.4599850000000174 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf125 1.65684682663 0 84.68 0.4799999999999983 -1 gpu conv perf 25 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf126 1.80156379054 0 83.040001 2.579998500000002 -1 gpu conv perf 25 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf127 1.65684682663 0 84.639999 0.520001000000002 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf128 1.7523659141 0 82.860001 2.8499985000000123 -1 gpu conv perf 24 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 24 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf129 1.69436712239 0 83.360001 2.0999985000000123 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf130 1.80156379054 0 83.0 2.6400000000000077 -1 gpu conv perf 25 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 36 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf131 1.7669421638 0 83.139999 2.430001500000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf132 1.68612242394 0 83.820007 1.4099895000000018 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf133 1.70269284588 0 84.099998 0.9900030000000086 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 24 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf134 1.73481954522 0 83.0 2.6400000000000077 -1 gpu conv perf 25 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf135 1.82049363128 0 83.840004 1.3799940000000177 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf136 1.7523659141 0 83.580002 1.7699970000000178 -1 gpu conv perf 25 add fp32 1 tanh fp32 1 -2 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 +2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf137 1.82049363128 0 83.940002 1.2299969999999973 -1 gpu conv perf 25 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf138 1.73481954522 0 83.360001 2.0999985000000123 -1 gpu conv perf 25 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv samp 34 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf139 1.73481954522 0 84.139999 0.930001500000003 -1 gpu conv perf 25 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf140 1.85589972143 0 83.0 2.6400000000000077 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf141 1.7523659141 0 83.360001 2.0999985000000123 -1 gpu conv perf 25 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf142 1.66987141658 0 84.759995 0.4000050000000016 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf143 1.7523659141 0 83.199997 2.3400045000000134 -1 gpu conv perf 25 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv perf 24 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf144 1.72558649193 0 84.82 0.34000000000001196 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf145 1.73481954522 0 83.719994 1.560009000000008 -1 gpu conv perf 25 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf146 1.7669421638 0 83.159996 2.4000059999999976 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf147 1.7669421638 0 83.060005 2.549992500000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 24 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf148 1.93249146701 0 83.139999 2.430001500000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf149 1.73481954522 0 83.68 1.6199999999999974 -1 gpu conv perf 25 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf150 1.74910416379 0 83.300003 2.189995500000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf151 1.74910416379 0 83.780006 1.4699910000000074 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf152 1.73481954522 0 83.460007 1.949989500000001 -1 gpu conv perf 25 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf153 1.88313156795 0 84.120003 0.9599955000000122 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf154 1.82049363128 0 83.400002 2.0399970000000067 -1 gpu conv perf 25 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv perf 24 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf155 1.88313156795 0 83.099998 2.4900030000000086 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf156 1.82049363128 0 83.400002 2.0399970000000067 -1 gpu conv perf 24 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 24 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf157 1.81032878247 0 84.680008 0.4799920000000043 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf158 1.7669421638 0 83.840004 1.3799940000000177 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf159 1.71762107501 0 84.699997 0.46000300000000893 -1 gpu conv perf 25 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf160 1.93249146701 0 82.979996 2.670006000000008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet2/data/autotuner_data/tuner_pareto_confs_batch220.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet2/data/autotuner_data/tuner_pareto_confs_batch220.txt index 5fc6f3d466..15e34ccf0d 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet2/data/autotuner_data/tuner_pareto_confs_batch220.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet2/data/autotuner_data/tuner_pareto_confs_batch220.txt @@ -1,473 +1,473 @@ +++++ conf1 1 0 84.76 0 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp32 1 add fp32 1 +1 gpu conv fp32 1 add fp32 1 tanh fp32 1 +2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +3 gpu conv fp32 1 add fp32 1 tanh fp32 1 +4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +5 gpu conv fp32 1 add fp32 1 tanh fp32 1 +6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +7 gpu mul fp32 1 add fp32 1 8 gpu softmax fp32 1 ----- +++++ conf1 1.9038241963 0 84.979996 0.18000400000000527 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf2 1.79160865678 0 85.259995 -0.09999499999999839 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf3 1.9038241963 0 84.900002 0.2599980000000045 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf4 1.88313156795 0 84.159996 0.9000059999999976 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf5 1.70856970404 0 85.699997 -0.5399969999999911 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf6 1.70856970404 0 85.479996 -0.31999599999999473 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf7 1.9038241963 0 84.82 0.34000000000001196 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf8 1.81032878247 0 84.400002 0.5399970000000067 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf9 1.72558649193 0 85.020004 0.139996000000005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf10 1.73481954522 0 85.540001 -0.3800009999999986 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf11 1.79160865678 0 84.319992 0.6600120000000089 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf12 1.71762107501 0 85.540001 -0.3800009999999986 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf13 1.72558649193 0 84.720001 0.4399990000000088 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf14 1.73481954522 0 85.419998 -0.2599980000000016 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf15 1.7523659141 0 84.759995 0.4000050000000016 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf16 1.88313156795 0 83.720001 1.5599985000000132 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf17 1.82049363128 0 84.239998 0.7800030000000078 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf18 1.71762107501 0 85.040001 0.11999900000000141 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf19 1.73481954522 0 84.719994 0.44000600000000534 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf20 1.88313156795 0 83.639999 1.680001500000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf21 1.63288729942 0 85.800003 -0.6400029999999987 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf22 1.81032878247 0 84.259995 0.7500075000000024 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf23 1.71762107501 0 84.860001 0.29999900000000823 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf24 1.88313156795 0 84.120003 0.9599955000000122 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf25 1.66425732228 0 85.800003 -0.6400029999999987 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf26 1.74294565288 0 84.559998 0.600002000000012 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf27 1.82049363128 0 84.440002 0.47999699999999734 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf28 1.81032878247 0 84.240005 0.779992500000013 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf29 1.88313156795 0 83.479996 1.9200060000000079 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf30 1.64115261583 0 86.300003 -1.1400029999999988 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf31 1.80156379054 0 84.239998 0.7800030000000078 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf32 1.70856970404 0 85.699997 -0.5399969999999911 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf33 1.83623037965 0 83.68 1.6199999999999974 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf34 1.83623037965 0 83.759995 1.5000075000000024 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf35 1.93249146701 0 82.860001 2.8499985000000123 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf36 1.93249146701 0 83.12001 2.4599850000000174 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf37 1.72558649193 0 84.82 0.34000000000001196 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf38 1.93249146701 0 83.139999 2.430001500000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf39 1.88313156795 0 84.120003 0.9599955000000122 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf40 1.88313156795 0 83.099998 2.4900030000000086 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf41 1.81032878247 0 84.680008 0.4799920000000043 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf42 1.93249146701 0 82.979996 2.670006000000008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet2/data/autotuner_data/tuner_promise_confs_batch220_multi.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet2/data/autotuner_data/tuner_promise_confs_batch220_multi.txt index 7a674793e1..8a3147cd5c 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet2/data/autotuner_data/tuner_promise_confs_batch220_multi.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet2/data/autotuner_data/tuner_promise_confs_batch220_multi.txt @@ -1,17281 +1,17281 @@ +++++ conf1 1 0 84.76 0 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp32 1 add fp32 1 +1 gpu conv fp32 1 add fp32 1 tanh fp32 1 +2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +3 gpu conv fp32 1 add fp32 1 tanh fp32 1 +4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +5 gpu conv fp32 1 add fp32 1 tanh fp32 1 +6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +7 gpu mul fp32 1 add fp32 1 8 gpu softmax fp32 1 ----- +++++ conf1 2.75602585333 0 85.603499975 -0.14349997500000028 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf2 1.97124355876 0 84.40650025 0.5302496250000175 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf3 2.42513277215 0 84.38499995 0.5625000750000169 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf4 3.15731364232 0 84.32650055 0.650249174999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf5 2.15883788221 0 84.71599955 0.44400044999999866 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf6 2.44524097268 0 84.679500025 0.4804999750000093 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf7 2.65671333449 0 84.833500925 0.3264990750000095 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf8 2.75602585333 0 85.19850045 0.26149955000000774 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf9 2.40535258985 0 84.749999375 0.41000062500000356 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf10 3.61386264477 0 84.3579998 0.603000300000005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf11 2.42513277215 0 84.584999875 0.5750001250000111 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf12 2.37819629574 0 84.6939999 0.4660001000000108 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf13 2.47811701105 0 84.370999875 0.5835001875000145 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf14 2.32202027362 0 84.536500575 0.623499425 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf15 3.44499877858 0 84.21249925 0.8212511250000176 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf16 2.71542297875 0 84.429999925 0.4950001125000014 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf17 2.93135224398 0 85.350500475 0.1094995250000011 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf18 3.38956467165 0 84.52750015 0.6324998500000106 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf19 3.51061454803 0 84.459999475 0.4500007875000023 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf20 2.52912274255 0 84.24849965 0.7672505250000086 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf21 3.25488617683 0 84.588000525 0.5719994750000069 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf22 2.23579750603 0 84.972999475 0.18700052500001052 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf23 2.61896392029 0 84.6199998 0.5400002000000029 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf24 2.75602585333 0 84.448499375 0.4672509375000118 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf25 2.19091471805 0 85.341498375 0.11850162500000466 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf26 2.60048866322 0 85.50600055 -0.04600054999999087 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf27 3.55290898553 0 84.27399915 0.7290012750000159 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf28 2.60048866322 0 85.5190003 -0.05900029999999673 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf29 2.12156410089 0 85.249500075 0.21049992500000486 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf30 1.63288729942 0 85.540001 -0.0800009999999986 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf31 2.64243046275 0 84.747 0.41300000000000525 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf32 2.96673931882 0 84.3670011 0.5894983500000137 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf33 3.15731364232 0 84.5144993 0.645500700000008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf34 2.93135224398 0 84.598999075 0.5610009250000104 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf35 2.12156410089 0 85.24549965 0.21450035000000583 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf36 2.52912274255 0 84.636998925 0.5230010750000048 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf37 2.76636700953 0 84.511999775 0.6480002249999984 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf38 3.48571779877 0 84.44449935 0.47325097500000624 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf39 3.02145143763 0 84.706499675 0.4535003250000017 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf40 3.26208692053 0 84.479999525 0.6800004750000085 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf41 2.73050807996 0 85.514499575 -0.05449957499999697 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf42 3.17089287752 0 84.660999875 0.49900012500000346 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf43 2.90250153505 0 85.458999675 0.0010003250000039654 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf44 3.36634957954 0 84.552500325 0.6074996750000082 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf45 2.79256263316 0 84.668500675 0.49149932500000093 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf46 2.42912791142 0 84.414500625 0.5182490625000042 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf47 2.56430926229 0 84.79049965 0.36950035000000414 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf48 1.72558649193 0 84.899994 0.2600059999999985 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf49 3.35104871505 0 84.373500225 0.5797496625000065 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf50 2.03940354341 0 85.71549965 -0.25549964999999303 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf51 3.08174415116 0 84.29850005 0.6922499250000058 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf52 4.35642257412 0 84.669500075 0.49049992500000317 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf53 2.93433101084 0 85.609000775 -0.14900077499999326 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf54 3.48571779877 0 84.803999275 0.3560007250000098 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf55 2.70079305483 0 83.907500325 1.2787495125000063 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf56 3.42944293235 0 84.067999375 1.0380009375000085 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf57 2.75369879845 0 85.771000375 -0.3110003749999947 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf58 4.68889675944 0 83.9459997 1.2210004500000053 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf59 5.29506075557 0 84.0784999 1.022250150000012 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf60 4.38821376777 0 83.696001175 1.5959982374999981 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf61 2.93433101084 0 85.736000075 -0.27600007500000173 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf62 4.14169933128 0 83.947499675 1.218750487500003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf63 4.32387438839 0 83.771999775 1.4820003375000113 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf64 3.61386264477 0 84.3340002 0.6389996999999994 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf65 3.42944293235 0 85.239499575 0.22050042500000872 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf66 4.40131209873 0 83.8969994 1.2945009000000098 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf67 3.97425458366 0 83.9985008 1.1422488000000044 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf68 2.2528774348 0 84.350000025 0.6149999625000078 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf69 1.81032878247 0 83.699997 1.5900045000000134 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf70 4.06183643479 0 83.567500275 1.788749587500007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf71 4.749306487 0 84.390500075 0.5542498874999993 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf72 2.28075482883 0 84.5375 0.6225000000000108 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf73 4.17689583184 0 83.91850015 1.262249775000008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf74 4.55072975326 0 83.6869994 1.6095009000000005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf75 2.16999135568 0 85.843501325 -0.3835013250000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf76 3.88013266186 0 83.785500925 1.461748612500017 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf77 4.60761017185 0 83.649500225 1.665749662500012 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf78 3.51899268644 0 85.6135004 -0.15350040000000148 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf79 4.94025067672 0 83.966500175 1.190249737500018 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf80 2.95809675652 0 85.6330001 -0.1730000999999987 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf81 4.00665156404 0 83.8790009 1.3214986500000165 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf82 3.38956467165 0 85.33500065 0.12499935000000734 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf83 4.33659088676 0 83.988001225 1.1579981625000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf84 3.26931959511 0 84.842999475 0.317000525000006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf85 2.30571969084 0 83.461999975 1.9470000375000112 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf86 2.7508842139 0 84.106000125 0.9809998125000163 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf87 3.370196653 0 83.968499525 1.1872507125000098 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf88 4.94025067672 0 84.25399945 0.7590008250000153 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf89 3.94816064972 0 84.3739996 0.5790006000000005 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf90 3.52359981675 0 85.302000575 0.15799942500001124 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf91 3.05595874161 0 85.42199975 0.03800025000000745 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf92 2.15883788221 0 84.2609997 0.7485004500000088 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf93 4.48904909382 0 83.8949996 1.2975005999999993 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf94 3.05595874161 0 85.22250005 0.2374999500000115 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf95 3.22328457615 0 85.391500875 0.0684991249999996 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf96 3.65869733242 0 83.628499675 1.697250487500007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf97 3.13050112438 0 84.350000025 0.6149999625000078 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf98 4.42774475186 0 83.944500275 1.223249587500014 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf99 2.29874094265 0 84.200499275 0.8392510875000099 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf100 4.34358967411 0 84.47900025 0.6809997500000066 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf101 2.98510874485 0 84.6229994 0.5370006000000075 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf102 3.11726491618 0 85.597999975 -0.13799997499998823 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf103 3.09443798634 0 85.223499675 0.23650032500000578 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf104 2.33141358263 0 84.844499025 0.3155009750000005 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf105 2.50340530846 0 84.1550001 0.9074998500000149 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf106 5.24794714078 0 84.0970001 0.994499850000004 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf107 3.19188638661 0 83.9204998 1.259250300000005 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf108 4.78711530883 0 83.746501 1.5202485000000152 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf109 3.38956467165 0 83.95250035 1.2112494750000167 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf110 2.49720367723 0 83.921998775 1.257001837499999 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf111 4.1533654053 0 83.1165007 2.465248950000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf112 3.0122585054 0 84.66600095 0.4939990500000079 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 3 5 promise swing_level 7 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf113 3.57011348802 0 83.949000875 1.2164986875000139 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf114 4.00665156404 0 84.172499475 0.8812507875000151 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf115 3.08465896376 0 82.97749955 2.673750675000001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf116 4.17689583184 0 83.724500075 1.5532498875000158 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf117 4.36224960626 0 82.850499925 2.8642501125000166 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf118 4.03958106713 0 84.96750085 0.19249915000001183 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 4 5 promise swing_level 7 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf119 2.28075482883 0 82.8405001 2.879249850000008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf120 4.3751931669 0 83.410500125 2.0242498125000097 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf121 4.53672840544 0 83.13549935 2.4367509750000025 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf122 3.10094560281 0 83.877000975 1.3244985375000056 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf123 2.81925910919 0 83.148499875 2.4172501875000094 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf124 4.22476570574 0 83.5195001 1.8607498500000048 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf125 3.84974163103 0 84.2115002 0.8227497000000028 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf126 3.43305296381 0 83.175499875 2.376750187500008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf127 3.04334411031 0 83.611999925 1.7220001124999982 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf128 3.93186488986 0 83.7040003 1.5839995500000015 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf129 4.85009935161 0 83.5490004 1.816499400000012 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf130 4.3751931669 0 83.993998825 1.1490017624999993 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf131 4.14752416478 0 85.14899995 0.01100005000000126 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 4 5 promise swing_level 7 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf132 4.38821376777 0 83.797500275 1.443749587500001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf133 3.27658441341 0 84.544500125 0.6154998750000061 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf134 4.03958106713 0 85.25950035 0.20049965000000897 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 4 5 promise swing_level 7 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf135 3.59623494226 0 83.939500225 1.2307496625000027 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf136 4.44108049557 0 83.8625007 1.3462489500000103 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf137 2.56430926229 0 84.193999325 0.8490010125000182 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf138 3.75178902153 0 83.339999975 2.130000037500011 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf139 3.70465846079 0 84.716999475 0.44300052500001075 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf140 2.70545850471 0 83.519999375 1.8600009375000113 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf141 3.17089287752 0 83.236999625 2.284500562500014 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf142 3.61386264477 0 85.143000275 0.016999724999999466 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 6 5 promise swing_level 7 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf143 4.71064890814 0 83.529500325 1.8457495125000065 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf144 4.47477396827 0 83.492000225 1.9019996625000104 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 6 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf145 4.53672840544 0 84.051999675 1.0620004875000006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf146 3.31339824504 0 84.7090006 0.45099940000000915 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf147 2.82736787184 0 84.600000125 0.5599998750000111 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 5 promise swing_level 6 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf148 2.74581611742 0 83.193 2.350500000000011 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf149 2.87421319527 0 83.2785002 2.222249700000013 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf150 4.42774475186 0 83.477500925 1.923748612500006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf151 2.30388008076 0 84.286499575 0.7102506375000175 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf152 3.59185485404 0 84.2370006 0.784499100000005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf153 2.38015655098 0 84.838500775 0.32149922500000516 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf154 4.91476285223 0 83.475999725 1.9260004125000165 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf155 4.21269566507 0 83.087499475 2.5087507875000057 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf156 2.22395915051 0 83.238499725 2.2822504125000123 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf157 4.75623276948 0 83.1924999 2.3512501500000056 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf158 4.8341984727 0 83.758499325 1.502251012500004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf159 3.70465846079 0 83.383499375 2.0647509375000084 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf160 3.15731364232 0 84.30150075 0.687748875000004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf161 2.3293565891 0 84.392 0.5520000000000138 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf162 2.2528774348 0 84.997000175 0.16299982500000854 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf163 2.71542297875 0 85.508499875 -0.04849987499999314 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf164 3.15731364232 0 84.4710007 0.6889993000000004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf165 2.90250153505 0 85.274500125 0.18549987500000215 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf166 2.93135224398 0 85.342000525 0.11799947500000202 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf167 2.52912274255 0 84.337999375 0.6330009375000145 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf168 2.14786504266 0 84.48350045 0.6764995500000112 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf169 2.21727076111 0 84.595499825 0.5645001750000006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf170 2.2528774348 0 85.041 0.11900000000000832 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf171 3.25488617683 0 84.558999125 0.6010008750000054 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf172 2.40535258985 0 84.651500075 0.5084999250000038 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf173 2.37819629574 0 84.377000025 0.5744999625000062 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf174 3.32835645011 0 84.435499525 0.4867507125000117 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf175 2.35917135957 0 84.716500425 0.4434995749999985 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf176 2.50761735877 0 84.535999925 0.6240000750000064 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf177 2.42513277215 0 84.369499725 0.5857504125000119 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf178 3.61386264477 0 84.274499475 0.7282507875000093 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf179 2.25801563131 0 84.589000125 0.5709998750000068 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf180 2.35917135957 0 84.480499875 0.6795001249999985 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf181 2.81925910919 0 85.573499275 -0.11349927499999807 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf182 2.52267840919 0 84.341001025 0.6284984625000121 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf183 3.17089287752 0 84.6909999 0.4690001000000109 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf184 2.75602585333 0 84.700500525 0.45949947500000976 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf185 2.57325940549 0 84.8150002 0.3449998000000051 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf186 2.90250153505 0 84.423999575 0.5040006375000132 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf187 2.66632133899 0 85.6425003 -0.18250029999998957 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf188 2.09298780883 0 84.61899955 0.5410004500000071 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf189 1.8711830795 0 84.5810003 0.5789997000000057 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf190 2.21727076111 0 84.653999475 0.5060005249999989 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf191 2.40535258985 0 84.786999975 0.3730000250000046 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf192 2.93135224398 0 85.291999425 0.16800057500000493 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf193 1.95815465425 0 85.651500225 -0.19150022500000147 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf194 3.36634957954 0 84.494499625 0.665500375000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf195 3.40522010623 0 84.375498825 0.5767517625000167 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf196 2.66632133899 0 84.538500425 0.62149957500001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf197 2.50128201583 0 84.1665 0.8902500000000089 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf198 2.27895482379 0 84.610999625 0.549000375 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf199 2.81925910919 0 84.495000675 0.6649993250000051 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf200 3.29857388792 0 84.6065009 0.5534991000000048 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf201 2.61896392029 0 84.7069994 0.4530006000000043 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf202 3.04014002515 0 84.4635001 0.6964999000000006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf203 3.06572586948 0 84.7455009 0.414499100000009 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf204 3.93186488986 0 84.356500625 0.6052490625000146 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf205 4.94774553187 0 83.768500225 1.4872496625000124 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf206 3.84520265677 0 84.290999425 0.7035008625000145 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf207 2.61205424073 0 84.867499325 0.292500675000008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf208 2.66414324327 0 86.032000175 -0.5720001749999881 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf209 3.69074932962 0 84.219500725 0.8107489125000029 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf210 4.06237215081 0 85.0309998 0.12900020000000156 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf211 3.31339824504 0 83.93949965 1.2307505250000048 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf212 3.26931959511 0 85.4195011 0.04049890000000006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf213 2.27916093093 0 83.951499525 1.2127507125000037 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf214 3.03418372506 0 84.44249975 0.47625037500001355 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf215 3.35149437292 0 84.049999625 1.0650005625000105 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf216 2.97300733121 0 83.60550005 1.7317499250000026 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf217 3.52741090964 0 85.505999525 -0.04599952499998777 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf218 3.63209225801 0 83.57750015 1.7737497749999989 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf219 2.32202027362 0 85.46449985 -0.0044998499999905905 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf220 4.20126736815 0 84.3879992 0.5580012000000139 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf221 2.32202027362 0 85.6529999 -0.1929998999999924 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf222 3.51519975253 0 83.630000325 1.6949995125000115 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf223 4.05678387799 0 85.077499975 0.08250002500001019 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf224 3.60986255985 0 85.31750015 0.14249985000000437 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf225 3.03730870211 0 83.63900065 1.681499025000008 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 5 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf226 5.15619113861 0 84.271500925 0.7327486125000107 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf227 2.73835708776 0 85.30150055 0.15849945000000504 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf228 4.29300531776 0 84.03700005 1.084499925000003 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf229 4.81915540131 0 84.11099975 0.9735003750000004 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf230 2.89679940787 0 83.536999925 1.8345001125000024 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf231 4.20126736815 0 84.871999175 0.288000825000006 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf232 4.29300531776 0 84.8200001 0.3399999000000037 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf233 5.06758855666 0 84.5160001 0.6439999000000057 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf234 2.81925910919 0 83.753498675 1.5097519875000032 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf235 4.46799443574 0 84.1489996 0.9165006000000133 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf236 4.29300531776 0 84.8124996 0.3475004000000098 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf237 2.8439887133 0 83.401999875 2.0370001875000057 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf238 3.1208796497 0 85.50700015 -0.04700014999999097 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf239 4.81915540131 0 84.649000525 0.5109994749999999 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf240 4.63728501684 0 84.728500125 0.43149987500000864 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf241 4.71136945196 0 84.487998925 0.6720010750000057 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf242 4.38883904114 0 84.81199985 0.3480001499999986 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf243 2.61686247873 0 83.9070001 1.2794998500000005 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf244 2.76636700953 0 84.293000275 0.700499587500012 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf245 2.02960847562 0 84.04700125 1.0694981250000168 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf246 3.37064741875 0 84.3115009 0.6727486500000097 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf247 3.38956467165 0 85.288499275 0.17150072499999852 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf248 4.35642257412 0 84.583000325 0.5769996750000047 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf249 3.3138339442 0 85.486001025 -0.026001024999987965 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf250 3.27701048398 0 83.59450055 1.748249174999998 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf251 3.25488617683 0 84.555500025 0.6044999750000045 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf252 4.63728501684 0 84.09699955 0.9945006749999976 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf253 5.07547506828 0 83.76399915 1.4940012750000022 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf254 2.32388897879 0 84.887499975 0.2725000250000079 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf255 2.89145135544 0 85.8500006 -0.3900005999999962 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf256 4.94025067672 0 84.158499675 0.9022504875000052 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf257 2.76636700953 0 84.41199975 0.5220003749999975 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf258 3.31003535109 0 85.418500075 0.04149992500000793 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf259 3.09443798634 0 85.576000175 -0.1160001749999992 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf260 2.22923500612 0 84.67999975 0.4800002500000119 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf261 3.35149437292 0 84.19099985 0.8535002250000119 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf262 5.02441943023 0 84.25649955 0.7552506750000063 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf263 3.31339824504 0 85.33299995 0.12700005000000375 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf264 3.69537408729 0 85.2349998 0.22500020000000803 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 5 5 promise swing_level 6 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf265 3.88013266186 0 84.294999275 0.6975010875000152 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf266 4.1533654053 0 82.951 2.7135000000000176 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf267 2.89679940787 0 82.9884999 2.657250150000017 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 5 promise swing_level 6 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf268 4.39475317355 0 83.83999995 1.380000074999998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf269 2.74576172323 0 83.461999475 1.9470007875000093 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 5 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf270 5.40080120652 0 82.863499425 2.844750862500007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 5 4 promise swing_level 3 5 promise swing_level 6 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf271 2.89111964106 0 85.57900005 -0.11900004999999964 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf272 3.75178902153 0 84.894499425 0.2655005749999987 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 6 5 promise swing_level 6 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf273 4.71064890814 0 83.8385005 1.3822492500000152 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf274 4.39475317355 0 83.992499725 1.151250412500005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf275 4.29866382416 0 83.3029991 2.185501350000017 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf276 3.64063062067 0 84.453499975 0.4597500375000081 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf277 5.28466361659 0 82.879000675 2.821498987500007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf278 4.68074200425 0 83.704000075 1.5839998875000134 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf279 4.09568311293 0 83.305500125 2.1817498125000157 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf280 4.08433838508 0 83.359500075 2.100749887500008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf281 3.32835645011 0 84.2059993 0.8310010500000047 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf282 2.9489396246 0 82.913999825 2.7690002625000005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf283 4.40131209873 0 83.8754995 1.3267507500000022 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf284 4.10709103894 0 83.194000425 2.3489993625000096 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf285 3.8699491435 0 83.5369993 1.83450105 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf286 3.70010922218 0 83.02999955 2.5950006750000085 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 6 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf287 2.91115808335 0 82.77200055 2.9819991750000057 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf288 3.99579404276 0 83.80400105 1.4339984250000128 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf289 4.48157410599 0 82.996000025 2.6459999625000066 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf290 4.29866382416 0 83.480500575 1.9192491375000174 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf291 3.33219723454 0 83.320000375 2.159999437499998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf292 3.9635717019 0 85.159999775 2.25000002296305e-07 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 6 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf293 2.19094934992 0 84.789500625 0.37049937500000285 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 5 promise swing_level 6 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf294 5.1195298246 0 82.923000125 2.7554998125000054 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 6 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf295 4.39475317355 0 83.503499775 1.8847503375000159 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf296 3.48984270518 0 84.316501225 0.6652481625000135 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf297 5.06675494506 0 82.949500225 2.7157496625000164 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 5 4 promise swing_level 3 5 promise swing_level 6 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf298 2.73050807996 0 83.426499675 2.0002504875000042 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf299 4.46799443574 0 83.66850015 1.637249775000008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf300 4.60761017185 0 82.818499225 2.912251162500013 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 4 promise swing_level 5 5 promise swing_level 7 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf301 4.32387438839 0 82.778499525 2.9722507125000064 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 5 4 promise swing_level 7 5 promise swing_level 6 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf302 4.08433838508 0 85.01950015 0.1404998500000062 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 3 5 promise swing_level 6 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf303 4.06183643479 0 82.95150055 2.712749174999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf304 4.53672840544 0 83.8220002 1.4069997 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf305 2.84647094048 0 85.43150025 0.028499750000005986 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf306 5.04077343699 0 82.977500125 2.6737498124999988 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 4 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf307 3.9635717019 0 82.8584995 2.8522507500000174 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf308 3.61839106317 0 82.968501275 2.6872480875000164 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 6 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf309 4.22476570574 0 83.063999525 2.544000712500008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf310 4.29866382416 0 83.97550025 1.1767496250000136 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf311 2.88270158233 0 84.071500325 1.0327495125000041 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf312 3.91100734284 0 83.092499525 2.501250712500017 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf313 4.3751931669 0 83.9080002 1.2779997000000023 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf314 4.3751931669 0 82.922000325 2.756999512500009 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf315 2.84098663088 0 82.856499725 2.8552504124999984 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 5 promise swing_level 6 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf316 2.2528774348 0 84.9994999 0.1605001000000016 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf317 3.17089287752 0 84.63199955 0.5280004500000018 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf318 2.60283267947 0 84.2664995 0.7402507500000155 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf319 2.32202027362 0 84.53199975 0.628000250000008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf320 3.13716146983 0 84.20149995 0.8377500750000095 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf321 2.520476477 0 84.890501025 0.2694989749999991 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf322 3.36634957954 0 84.530499175 0.6295008250000024 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf323 2.75602585333 0 85.24450055 0.2154994500000072 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf324 2.73050807996 0 85.327499775 0.1325002249999983 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf325 2.57325940549 0 85.38850005 0.07149995000000048 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf326 2.23579750603 0 84.981499925 0.17850007500001086 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf327 1.70856970404 0 85.779999 -0.31999899999999853 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf328 3.35104871505 0 84.346000075 0.6209998874999982 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf329 3.15731364232 0 84.38100045 0.5684993250000048 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf330 2.87421319527 0 85.384499975 0.07550002500000802 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf331 2.73050807996 0 84.6715004 0.4884996000000058 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf332 3.24057963994 0 84.39200025 0.5519996250000148 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf333 3.25488617683 0 84.634000375 0.5259996250000057 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf334 2.73050807996 0 85.609499625 -0.14949962499999286 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf335 2.09742183942 0 85.229000225 0.23099977500001218 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf336 3.0527280537 0 84.765500125 0.3944998750000025 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf337 2.12156410089 0 85.1930007 0.2669993000000062 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf338 2.90250153505 0 85.44900055 0.01099945000001129 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf339 2.37819629574 0 84.46199975 0.6980002500000012 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf340 2.22395915051 0 84.951000175 0.2089998250000008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf341 3.04014002515 0 84.3760006 0.5759991000000113 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf342 2.44524097268 0 85.568500075 -0.10850007499999775 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf343 2.27895482379 0 84.6024996 0.5575004000000036 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf344 1.95815465425 0 85.534000025 -0.07400002499999231 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf345 2.90250153505 0 85.49499935 -0.03499934999999538 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf346 3.17089287752 0 84.686 0.4739999999999981 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf347 2.81925910919 0 85.59749985 -0.13749985000000037 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf348 3.29857388792 0 84.630999275 0.5290007250000116 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf349 3.26208692053 0 84.315000225 0.6674996624999991 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf350 2.03940354341 0 85.43599915 0.02400085000000446 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf351 2.31110203954 0 84.442999825 0.475500262500006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf352 3.46931522498 0 84.3325001 0.6412498500000012 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf353 2.37819629574 0 84.4335003 0.4897495499999991 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf354 2.57325940549 0 85.4859998 -0.025999799999996742 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf355 3.38956467165 0 84.546000025 0.6139999750000072 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf356 3.51061454803 0 84.455499375 0.4567509375000043 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf357 2.79256263316 0 84.673500175 0.4864998250000042 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf358 2.81925910919 0 84.470000625 0.6899993750000079 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf359 2.60048866322 0 85.495500325 -0.03550032499998962 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf360 4.94025067672 0 84.404999875 0.5325001875000055 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf361 4.67403368929 0 84.088000125 1.0079998125000174 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf362 3.47749717017 0 85.815501 -0.35550099999999246 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf363 4.85885544059 0 84.292499175 0.7012512375000028 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf364 4.68074200425 0 83.977499175 1.1737512374999994 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf365 4.78711530883 0 83.862999725 1.3455004125000158 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf366 4.0017352815 0 85.11050035 0.04949965000000989 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf367 4.36871179935 0 84.263999575 0.7440006375000081 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf368 3.13382775829 0 83.49650025 1.8952496250000124 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 4 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf369 3.69119162626 0 84.524999225 0.6350007750000032 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf370 2.73080396045 0 84.9065 0.25350000000001105 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf371 4.57189457086 0 84.15749935 0.9037509750000154 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf372 3.59185485404 0 85.0480001 0.11199990000000926 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf373 4.67403368929 0 84.4090002 0.5264997000000164 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf374 2.97571998859 0 83.81799985 1.4130002249999976 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf375 3.35149437292 0 83.5740013 1.7789980499999984 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf376 5.02441943023 0 84.47450005 0.685499950000002 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf377 4.78711530883 0 84.00499975 1.1325003750000135 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf378 3.35149437292 0 84.349999975 0.6150000375000033 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf379 3.10094560281 0 86.061499025 -0.6014990249999983 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf380 4.48904909382 0 84.381999625 0.5670005624999988 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf381 2.80851950068 0 83.815500875 1.4167486875000108 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf382 4.29300531776 0 84.627500175 0.5324998250000107 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf383 3.54863373783 0 85.767000175 -0.3070001750000017 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf384 3.13755204997 0 83.70100015 1.5884997750000096 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf385 2.54900634755 0 84.051000425 1.0634993625000106 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf386 3.28789331155 0 85.129000325 0.030999674999998228 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf387 5.11150591832 0 84.26499995 0.7425000750000024 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf388 1.87599501422 0 84.58149945 0.5785005500000097 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf389 4.33659088676 0 83.977500975 1.1737485375000105 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf390 4.8661051796 0 83.88949965 1.3057505250000006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf391 3.38604547437 0 85.566500875 -0.10650087499999755 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf392 2.96078226017 0 85.70449975 -0.2444997499999914 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf393 5.03217204616 0 83.6715 1.6327500000000157 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf394 3.15056754597 0 84.98200025 0.17799975000000645 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf395 4.00665156404 0 83.9340005 1.2389992500000133 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 5 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf396 3.39002063361 0 85.293999675 0.16600032500001022 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf397 3.21935410129 0 84.070499775 1.034250337500005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf398 4.85885544059 0 84.65649925 0.5035007500000092 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf399 4.70385465798 0 84.5609999 0.5990001000000064 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf400 3.44499877858 0 85.4609991 -0.000999099999989983 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf401 4.55072975326 0 84.080499875 1.0192501875000062 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf402 2.81925910919 0 83.83000085 1.3949987250000007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf403 4.33022330151 0 84.1734993 0.879751050000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf404 2.65699343733 0 83.763500225 1.4947496625000056 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf405 5.20166453319 0 84.033999425 1.0890008625000007 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf406 3.25164094825 0 83.796001025 1.4459984625000146 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf407 4.48904909382 0 84.65949995 0.500500050000008 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf408 4.23690511031 0 84.1950004 0.8474994000000109 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf409 2.21401235836 0 85.0235005 0.13649950000000788 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf410 3.33596660148 0 85.36200025 0.09799975000001099 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf411 3.15056754597 0 83.512500525 1.8712492125000182 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf412 4.0900028821 0 83.86599955 1.3410006750000107 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 4 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf413 4.74857429339 0 84.053999725 1.059000412500012 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf414 3.25488617683 0 83.933500225 1.239749662500003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf415 2.80851950068 0 83.87650005 1.3252499250000014 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf416 3.70465846079 0 84.4189996 0.511500599999998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf417 4.67403368929 0 84.3789996 0.5715006000000074 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf418 3.05595874161 0 84.039000725 1.0814989125000167 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf419 3.59185485404 0 85.8430001 -0.38300009999999246 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf420 4.06183643479 0 84.1495005 0.9157492500000046 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf421 2.69556689425 0 84.286999325 0.709501012500013 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf422 3.3549420805 0 85.782500025 -0.32250002499999936 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf423 2.45955992282 0 84.842500325 0.317499675000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf424 4.49523657385 0 83.92400015 1.2539997750000111 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf425 4.48904909382 0 83.901000575 1.2884991375000112 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf426 3.39354810299 0 85.559999275 -0.09999927499999045 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf427 4.94025067672 0 84.243499175 0.7747512375000127 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf428 4.56481779115 0 84.1150009 0.9674986500000102 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf429 4.17689583184 0 84.253999075 0.7590013875000139 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf430 3.61386264477 0 85.844500175 -0.38450017499998806 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf431 3.08174415116 0 85.580500375 -0.12050037499999461 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf432 4.99805803481 0 83.592999425 1.7505008625000045 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf433 4.03958106713 0 84.00849995 1.1272500750000063 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf434 3.02145143763 0 83.268000675 2.2379989875000135 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf435 4.49523657385 0 83.028999975 2.5965000375000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf436 4.27374553867 0 83.9910002 1.1534997000000047 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf437 4.36224960626 0 83.847999525 1.3680007124999989 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf438 2.71542297875 0 83.52849905 1.8472514250000174 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf439 2.47811701105 0 84.5134996 0.6465004000000022 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf440 4.23690511031 0 83.79150005 1.4527499250000133 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf441 3.29857388792 0 85.05850045 0.10149955000000832 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf442 5.20997422309 0 83.15000035 2.414999475000009 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf443 5.50155459048 0 83.250999075 2.263501387500014 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf444 3.18458942266 0 84.93299905 0.22700094999999865 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf445 4.68074200425 0 83.61949965 1.710750525000016 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf446 3.84974163103 0 84.69049935 0.46950065000000907 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf447 4.63658695371 0 83.7550003 1.5074995499999986 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf448 4.68074200425 0 83.94699955 1.2195006750000061 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf449 2.9489396246 0 83.6290004 1.6964994000000146 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf450 5.1643560615 0 83.562499475 1.7962507875000142 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf451 3.93186488986 0 83.49099985 1.9035002250000161 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf452 3.55728584143 0 82.924999 2.752501500000008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf453 4.24300101117 0 83.38699945 2.0595008250000006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 7 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf454 4.95606048827 0 83.69699935 1.5945009750000096 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf455 4.75623276948 0 83.516500825 1.8652487625000163 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf456 2.7586587916 0 83.8450002 1.3724997000000059 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf457 5.40080120652 0 83.35250035 2.111249475000008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf458 3.44499877858 0 84.8129998 0.3470002000000051 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf459 3.75178902153 0 84.776000275 0.3839997250000039 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf460 3.55290898553 0 84.6360004 0.5239996000000048 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf461 5.30367177341 0 82.9259998 2.7510003000000083 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 3 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf462 4.56481779115 0 83.055499625 2.5567505625000138 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf463 5.20997422309 0 83.420999525 2.008500712500009 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf464 4.45449681271 0 83.66950075 1.6357488750000115 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf465 2.520476477 0 84.838000625 0.32199937499999864 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 5 promise swing_level 6 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf466 3.55290898553 0 85.03699985 0.1230001500000043 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf467 3.65869733242 0 85.583500025 -0.12350002500000129 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf468 4.45449681271 0 83.6295 1.695750000000018 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf469 4.71064890814 0 83.543000025 1.825499962500011 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf470 2.2189746076 0 83.33249935 2.1412509749999984 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf471 2.97872563985 0 83.459499975 1.9507500375000077 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf472 5.1195298246 0 83.2679997 2.2380004500000013 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf473 4.91476285223 0 83.098000475 2.492999287499998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf474 4.68074200425 0 83.8485003 1.367249550000011 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf475 4.27374553867 0 83.976999975 1.1745000375000103 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf476 3.57011348802 0 83.251999825 2.2620002625000097 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf477 4.27374553867 0 83.580000125 1.7699998125000107 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf478 4.01756825102 0 84.247500275 0.7687495875000181 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf479 4.8661051796 0 83.483000725 1.9154989125000128 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf480 2.3293565891 0 84.3574996 0.6037506000000121 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf481 3.73279362334 0 84.351000625 0.6134990625000114 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf482 5.20997422309 0 83.508000175 1.877999737499998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf483 4.71064890814 0 83.18400015 2.3639997750000035 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf484 2.97872563985 0 84.0844998 1.0132503000000028 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf485 4.99805803481 0 83.6575002 1.6537497000000059 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf486 4.68074200425 0 83.4239998 2.0040003000000013 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf487 4.68074200425 0 83.921 1.258499999999998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf488 5.60613859814 0 83.26149955 2.247750675000013 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf489 3.65869733242 0 84.5534994 0.6065005999999983 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf490 3.61386264477 0 84.3454998 0.6217503000000093 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf491 2.54220394201 0 84.2759998 0.7260003000000168 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf492 2.69064816356 0 84.613499375 0.5465006250000016 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf493 2.50761735877 0 84.60450015 0.5554998499999982 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf494 2.57325940549 0 84.82849985 0.3315001500000051 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf495 2.75602585333 0 85.552499775 -0.09249977499999601 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf496 2.90250153505 0 85.553999725 -0.09399972499999193 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf497 2.93135224398 0 85.3530005 0.10699950000001196 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf498 3.25488617683 0 84.6365001 0.5234998999999988 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf499 3.17089287752 0 84.72150055 0.43849945000000334 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf500 2.71542297875 0 84.4644997 0.6955003000000005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf501 2.39753056999 0 84.811998925 0.34800107500000765 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf502 3.51061454803 0 84.41499995 0.5175000750000152 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf503 2.15883788221 0 85.138499675 0.021500324999999543 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf504 3.61386264477 0 84.338499175 0.6322512375000144 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf505 2.3293565891 0 84.847500825 0.3124991750000078 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf506 3.0527280537 0 84.755499425 0.4045005750000087 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf507 2.25112114639 0 85.3159997 0.14400029999999903 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf508 2.59591050603 0 84.5725002 0.5874998000000119 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf509 2.21727076111 0 84.6444996 0.515500400000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf510 3.51061454803 0 84.459499925 0.4507501125000033 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf511 2.37819629574 0 84.42350025 0.5047496250000023 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf512 3.11726491618 0 84.47399915 0.6860008500000078 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf513 2.93135224398 0 84.58050005 0.5794999500000074 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf514 3.36634957954 0 84.510999775 0.6490002250000032 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf515 2.48647461628 0 84.774499375 0.38550062500000026 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf516 2.23579750603 0 84.407000725 0.5294989125000029 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf517 2.71542297875 0 85.471001025 -0.011001025000001607 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf518 2.40535258985 0 84.3829998 0.5655003000000178 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf519 2.90250153505 0 85.4464989 0.013501100000010535 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf520 3.21935410129 0 84.5264997 0.6335003000000029 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf521 2.52912274255 0 84.368500525 0.5872492125000051 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf522 2.73050807996 0 84.839500775 0.3204992250000004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf523 2.29313137734 0 85.28849955 0.17150045000000774 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf524 2.66632133899 0 84.5649994 0.5950006000000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf525 3.25488617683 0 84.5769993 0.583000700000008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf526 4.11336811599 0 84.4165001 0.5152498500000178 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf527 3.93186488986 0 84.1989996 0.8415006000000176 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf528 3.26974377829 0 84.248499875 0.7672501875000179 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf529 3.35522632718 0 85.427999675 0.03200032500000988 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf530 3.51519975253 0 85.325000175 0.13499982500000557 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf531 2.54659446441 0 85.07150035 0.08849965000001137 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf532 3.40166834257 0 84.99499965 0.16500035000000823 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf533 4.42774475186 0 84.049499125 1.0657513125000122 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf534 3.23736286274 0 83.677999875 1.6230001875000113 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf535 3.08503657926 0 84.559999775 0.6000002250000108 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf536 2.43921651142 0 84.4750008 0.6849992000000015 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf537 2.63539656996 0 83.5919994 1.7520008999999988 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf538 4.68074200425 0 83.95500035 1.2074994749999988 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf539 3.26974377829 0 85.361000825 0.09899917500000016 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf540 3.21617930472 0 84.7574993 0.4025006999999988 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf541 3.15770925683 0 84.638499425 0.5215005749999989 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf542 3.55760541076 0 85.297001425 0.1629985750000003 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf543 2.45955992282 0 85.02450045 0.13549955000000014 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf544 4.45514111937 0 84.635499725 0.5245002750000026 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf545 3.65869733242 0 85.610500525 -0.15050052500000105 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf546 1.98330178402 0 83.787500225 1.4587496625000043 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf547 3.47749717017 0 85.6019997 -0.14199969999998813 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf548 3.67738324523 0 85.27900045 0.18099955000000706 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf549 3.70045496902 0 85.3585005 0.10149949999999991 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf550 3.28789331155 0 85.61950085 -0.1595008499999892 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf551 4.51654937482 0 84.5564993 0.6035007000000064 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf552 5.29506075557 0 84.062999625 1.0455005625000027 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf553 4.68889675944 0 84.380499025 0.5692514624999987 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf554 2.86615984136 0 83.78750105 1.4587484250000031 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 4 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf555 2.65699343733 0 84.853500125 0.30649987500000864 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf556 2.38015655098 0 84.0074999 1.128750150000009 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf557 3.26974377829 0 85.22600125 0.2339987500000092 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf558 3.28789331155 0 85.26200045 0.19799955000000297 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf559 2.97001320307 0 85.713000675 -0.2530006749999984 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf560 3.266045536 0 83.7365 1.5352499999999978 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf561 2.32402535693 0 85.8155004 -0.35550039999999966 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf562 2.97307110165 0 84.287499875 0.7087501875000157 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf563 3.32496313987 0 85.240500875 0.21949912500001006 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf564 3.21617930472 0 84.58050025 0.5794997500000051 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf565 2.8168240943 0 83.5575006 1.8037491000000117 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf566 2.80077299026 0 84.7334995 0.4265005000000116 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf567 4.42174155156 0 84.5384996 0.6215004000000107 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf568 4.749306487 0 84.507498925 0.6525010750000121 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf569 3.10094560281 0 85.609499925 -0.14949992499998926 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf570 2.31844697619 0 85.093500125 0.06649987499999954 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf571 2.20731326885 0 85.1330001 0.026999900000001298 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf572 2.80851950068 0 83.83300075 1.390498875000013 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf573 3.13089004774 0 84.530499925 0.6295000750000043 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf574 2.94339750184 0 84.789 0.37100000000000366 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf575 2.96673931882 0 84.37349995 0.579750075000014 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf576 3.05595874161 0 85.299 0.16099999999999853 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf577 2.29874094265 0 84.173999425 0.8790008624999999 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf578 2.89992066892 0 85.835000775 -0.37500077499999235 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf579 1.88691288205 0 85.365000175 0.09499982499999932 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf580 3.19188638661 0 83.9349999 1.2375001500000167 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf581 3.15770925683 0 83.932000175 1.2419997375000094 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf582 3.65459743556 0 85.2625 0.19750000000000228 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf583 4.48904909382 0 84.575000775 0.5849992249999986 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf584 3.17463312783 0 85.314500525 0.14549947500000543 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf585 4.46188167251 0 84.81450025 0.34549975000001043 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf586 3.30262160909 0 83.46249945 1.9462508250000141 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf587 2.44755104472 0 84.18550015 0.861749775000014 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf588 2.27743827873 0 85.0779999 0.08200010000001046 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf589 5.30367177341 0 83.246999525 2.2695007124999975 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf590 5.30367177341 0 83.489000525 1.906499212500016 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf591 5.28466361659 0 83.276498925 2.22525161250001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf592 2.93135224398 0 83.3195003 2.1607495500000056 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf593 3.39354810299 0 82.8199997 2.9100004500000125 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf594 4.49523657385 0 83.3474991 2.118751350000018 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 5 4 promise swing_level 7 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf595 5.03217204616 0 83.44649945 1.9702508250000008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf596 4.00665156404 0 83.358499125 2.102251312500016 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf597 2.90250153505 0 85.5110003 -0.051000300000001164 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf598 2.80057492256 0 83.1685006 2.3872491000000053 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf599 5.30367177341 0 83.4164997 2.0152504500000035 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf600 3.08465896376 0 83.0965006 2.4952491000000094 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 5 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf601 2.99080920887 0 83.1744995 2.3782507500000136 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf602 4.74094044098 0 83.113999725 2.4690004125000087 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf603 4.56481779115 0 83.7375002 1.5337497000000084 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf604 3.25488617683 0 85.93800085 -0.47800084999998943 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf605 4.95606048827 0 83.646 1.6710000000000065 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf606 3.46931522498 0 84.030500025 1.0942499625000153 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf607 3.47348842326 0 82.962000275 2.6969995875000166 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf608 5.1195298246 0 83.403000875 2.0354986875000023 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf609 3.31339824504 0 84.685499025 0.47450097500000654 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf610 5.03217204616 0 83.4554993 1.9567510500000083 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf611 3.68613613324 0 83.4790005 1.9214992500000108 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf612 4.91476285223 0 83.479000075 1.9214998875000049 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf613 3.80995316188 0 83.221500025 2.3077499625000115 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf614 4.17689583184 0 83.719001025 1.561498462500012 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf615 4.93119904091 0 83.428499825 1.9972502625000033 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf616 3.17089287752 0 84.216499775 0.8152503375000038 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf617 4.53672840544 0 83.583499825 1.7647502625000016 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 5 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf618 4.75623276948 0 83.30349945 2.184750825000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf619 4.94774553187 0 83.322999425 2.1555008624999985 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 5 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf620 5.1195298246 0 83.27599975 2.2260003750000124 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf621 5.40080120652 0 83.302000575 2.186999137500017 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf622 3.14057344002 0 83.0755001 2.5267498500000087 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf623 4.13009860961 0 83.785000275 1.4624995875000053 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 5 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf624 3.23702263508 0 85.207499875 0.2525001250000088 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf625 3.75178902153 0 84.038999775 1.0815003375000174 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf626 2.70799566698 0 83.4659988 1.9410018000000164 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf627 5.40080120652 0 83.39550015 2.046749775000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf628 4.56481779115 0 83.61799965 1.7130005250000053 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf629 4.06183643479 0 83.447999075 1.96800138750001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf630 3.42102082784 0 83.98699975 1.1595003750000146 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf631 4.75623276948 0 83.5254996 1.8517506000000026 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf632 2.44524097268 0 83.49900015 1.8914997750000069 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf633 2.86305165382 0 85.425500675 0.03449932500001013 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf634 4.46799443574 0 83.6610006 1.6484991000000164 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 5 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf635 2.65671333449 0 84.918999 0.24100100000000568 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 5 promise swing_level 6 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf636 3.32835645011 0 84.2724993 0.7312510499999973 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 7 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf637 2.59591050603 0 83.733500725 1.5397489125000092 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf638 4.75623276948 0 83.531999775 1.8420003375000036 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf639 3.38567327954 0 84.47250065 0.6874993500000045 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf640 2.81925910919 0 83.356000325 2.105999512500013 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf641 4.00665156404 0 83.6824997 1.6162504500000168 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf642 4.56481779115 0 83.063499975 2.544750037500009 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf643 4.49523657385 0 83.25949975 2.2507503750000026 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf644 4.3751931669 0 83.59049985 1.754250225000007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf645 3.11726491618 0 83.152500325 2.4112495124999995 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf646 3.93186488986 0 83.749999325 1.5150010125000009 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf647 4.09568311293 0 83.209001025 2.3264984624999983 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf648 3.35868172117 0 84.0900003 1.004999550000008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf649 2.81925910919 0 84.555999975 0.6040000249999992 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf650 2.42513277215 0 84.331499675 0.6427504875000025 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf651 1.90256982287 0 84.902999375 0.257000625000012 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf652 3.51061454803 0 84.4349997 0.48750044999999886 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf653 1.70856970404 0 85.779999 -0.31999899999999853 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf654 2.48647461628 0 84.258500325 0.7522495125000077 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf655 2.64243046275 0 85.2780003 0.18199970000000293 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf656 3.25488617683 0 84.5834995 0.5765005000000031 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf657 2.65671333449 0 85.422499975 0.03750002500001132 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf658 2.90250153505 0 85.48449975 -0.02449974999999255 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf659 3.40522010623 0 84.250000125 0.7649998125000081 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf660 1.70856970404 0 83.979996 1.1700060000000079 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf661 2.20731326885 0 85.229500375 0.23049962500000448 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf662 2.46568541903 0 84.5034996 0.6565004000000073 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf663 2.69064816356 0 84.5959997 0.5640003000000121 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf664 2.64243046275 0 85.358000375 0.10199962500000198 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf665 2.08260997416 0 84.921000775 0.23899922500000914 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf666 2.00610810282 0 85.678000625 -0.21800062499999057 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf667 3.51061454803 0 84.4344999 0.48825014999999894 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf668 2.50761735877 0 84.554999925 0.605000075000001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf669 2.50128201583 0 84.257000525 0.754499212500015 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf670 2.09742183942 0 84.831500325 0.32849967500001187 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf671 2.09742183942 0 85.82949995 -0.36949994999999375 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf672 3.36634957954 0 84.42999935 0.4950009750000035 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf673 2.90250153505 0 85.48999975 -0.02999974999999039 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf674 2.09298780883 0 84.546000375 0.6139996249999996 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf675 2.44524097268 0 85.437500525 0.022499475000000768 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf676 2.64243046275 0 84.79750005 0.3624999500000087 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf677 2.42513277215 0 84.43400065 0.48899902500000536 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf678 1.77010128766 0 84.7734991 0.38650090000001003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf679 2.61896392029 0 84.401499175 0.5377512375000109 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf680 2.08260997416 0 85.7424997 -0.2824996999999911 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf681 2.31110203954 0 84.53350035 0.6264996500000081 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf682 2.31110203954 0 84.32900035 0.6464994750000059 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf683 2.40535258985 0 84.41899995 0.5115000750000078 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf684 2.36674469012 0 84.894500225 0.26549977500000355 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf685 1.79160865678 0 85.339996 0.12000400000000583 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf686 2.08260997416 0 85.585000675 -0.12500067499999828 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf687 3.55290898553 0 84.268999875 0.736500187499999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf688 2.86305165382 0 84.4700005 0.6899995000000075 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf689 2.29313137734 0 84.528500025 0.6314999750000055 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf690 2.37819629574 0 84.4455007 0.4717489500000127 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf691 3.36634957954 0 84.542 0.6180000000000035 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf692 3.26208692053 0 84.27099965 0.7335005250000179 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf693 2.29313137734 0 84.73400045 0.4259995500000088 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf694 3.02145143763 0 84.648499925 0.5115000750000093 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf695 2.520476477 0 85.5895 -0.1294999999999959 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf696 2.57325940549 0 85.60300025 -0.14300024999998867 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf697 3.1883620737 0 85.56250075 -0.10250074999999298 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf698 3.06230535434 0 85.831000325 -0.37100032499999996 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf699 3.26931959511 0 84.712999925 0.4470000749999997 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf700 1.99673018213 0 83.7179994 1.563000900000013 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf701 2.47192966909 0 84.130000425 0.944999362499999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf702 3.54904262632 0 84.3324999 0.6412501500000047 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf703 2.41741365769 0 84.94699985 0.21300015000000772 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf704 2.31844697619 0 85.657000325 -0.19700032499999337 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf705 3.04014002515 0 83.450499725 1.9642504125000073 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf706 4.27374553867 0 83.857000625 1.354499062500011 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf707 2.96078226017 0 83.80250075 1.4362488750000182 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf708 3.21935410129 0 83.672501175 1.6312482375000172 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf709 2.01859798873 0 83.3645002 2.0932497000000154 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf710 3.09443798634 0 83.43500025 1.9874996250000052 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf711 2.63326868545 0 83.457499725 1.9537504124999998 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf712 2.94311625424 0 86.09950025 -0.6395002500000004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf713 3.06230535434 0 86.115999225 -0.6559992249999909 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf714 4.24911447842 0 83.793999475 1.4490007874999975 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf715 3.31003535109 0 83.8900007 1.3049989500000052 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 5 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf716 3.62274145966 0 83.74299865 1.5255020250000015 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf717 2.96078226017 0 84.0415006 1.0777490999999984 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf718 2.39576934005 0 84.632000575 0.5279994249999987 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf719 3.93186488986 0 83.845000625 1.3724990625000117 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf720 3.84520265677 0 84.046999725 1.0695004124999983 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf721 2.49704621765 0 84.4030001 0.5354998500000079 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf722 2.65699343733 0 85.916499875 -0.4564998749999944 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf723 2.73080396045 0 85.85699995 -0.39699994999999716 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf724 4.21872205213 0 83.792000375 1.4519994375000067 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf725 2.95787581321 0 86.111499675 -0.6514996749999995 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf726 2.38015655098 0 84.557498925 0.6025010750000007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf727 3.25164094825 0 84.9199999 0.2400001000000117 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf728 2.35356305584 0 85.23000035 0.22999965000000772 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf729 4.06183643479 0 83.7469994 1.519500899999997 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf730 2.40535258985 0 84.8959992 0.2640007999999995 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf731 3.1208796497 0 85.665500225 -0.20550022499999726 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf732 3.15770925683 0 85.61300085 -0.1530008500000008 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf733 3.88013266186 0 83.951999825 1.2120002625000055 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf734 3.3138339442 0 85.405999775 0.054000225000007174 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf735 3.34760900076 0 85.161500625 0.29849937500000295 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf736 4.18338826327 0 84.270000575 0.7349991375000116 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf737 2.49092077818 0 83.781500625 1.4677490624999976 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf738 3.02145143763 0 85.3139999 0.14600010000000624 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf739 3.09112547159 0 84.4004995 0.539250750000015 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf740 4.94774553187 0 83.575499 1.7767515000000174 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf741 3.79036576283 0 84.13149995 0.9427500749999993 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf742 3.9635717019 0 83.76249945 1.496250824999997 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf743 3.1208796497 0 83.843999825 1.3740002625000116 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 5 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf744 2.36502838432 0 86.03249995 -0.5724999499999968 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf745 4.46799443574 0 83.862499775 1.3462503375000026 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf746 3.09112547159 0 85.35899945 0.10100055000000624 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf747 3.13050112438 0 85.693498925 -0.23349892499999497 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf748 3.93186488986 0 83.7594999 1.500750150000016 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf749 3.31003535109 0 85.620500375 -0.16050037500000086 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf750 2.34234689252 0 84.826500525 0.333499475000005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf751 3.34760900076 0 83.8520003 1.3619995500000073 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf752 4.78009885505 0 83.989999425 1.1550008625000174 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf753 2.2649524773 0 84.44999965 0.4650005250000149 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf754 4.13009860961 0 83.844999475 1.372500787500016 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf755 3.17089287752 0 84.80849995 0.35150005000000706 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 5 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf756 3.22639831866 0 84.29249985 0.7012502250000097 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf757 3.02484725731 0 83.689999175 1.6050012375000122 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf758 4.27374553867 0 83.828500725 1.397248912500011 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf759 2.23426580428 0 84.93800035 0.22199965000000932 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf760 4.33659088676 0 83.9419998 1.2270003000000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf761 2.50340530846 0 83.616499725 1.715250412500012 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf762 3.31339824504 0 84.70049985 0.4595001500000052 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf763 3.17437865535 0 83.529500175 1.8457497375000145 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf764 2.56430926229 0 84.0064993 1.1302510500000054 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf765 3.30262160909 0 83.82300065 1.4054990250000117 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf766 2.51642672638 0 83.737999875 1.5330001875000079 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf767 3.09150467208 0 84.5830006 0.5769993999999997 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf768 2.14786504266 0 84.8885 0.27150000000001173 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf769 2.8439887133 0 83.5995003 1.7407495500000039 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf770 4.53042625525 0 84.394499275 0.548251087500006 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf771 5.02441943023 0 84.026499275 1.1002510874999984 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf772 2.83306023509 0 84.5554991 0.6045008999999993 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf773 3.62274145966 0 84.25650025 0.7552496250000047 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf774 3.9162009563 0 85.016500125 0.14349987500001193 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 4 5 promise swing_level 6 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf775 3.40522010623 0 84.588500025 0.5714999750000033 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf776 4.71064890814 0 83.615999425 1.7160008625000103 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 5 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf777 2.87421319527 0 83.48200005 1.9169999250000131 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf778 3.99579404276 0 85.31950015 0.14049985000000903 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 5 promise swing_level 6 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf779 4.28616846517 0 83.094000975 2.4989985375000074 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf780 3.23702263508 0 84.514499575 0.645500425000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf781 4.74857429339 0 83.713999775 1.5690003375000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf782 3.60502724486 0 84.59950105 0.5604989500000045 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf783 3.08465896376 0 84.54 0.6199999999999989 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf784 2.59591050603 0 83.83300055 1.3904991750000164 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf785 4.46123541487 0 84.034000175 1.0889997375000036 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf786 4.22476570574 0 83.06150035 2.5477494750000034 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf787 2.56430926229 0 84.01500055 1.1174991750000132 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf788 3.02145143763 0 83.44000055 1.9799991750000174 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf789 3.40522010623 0 84.200000775 0.8399988374999978 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf790 3.90066133282 0 83.974501425 1.1782478625000081 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf791 2.73557376185 0 83.05799955 2.553000675 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf792 2.76636700953 0 85.455001425 0.004998574999999061 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf793 3.47348842326 0 82.9944988 2.648251800000004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf794 4.14169933128 0 83.0104999 2.624250150000009 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf795 4.74857429339 0 83.849500325 1.3657495125000167 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf796 4.91476285223 0 83.179499625 2.3707505624999996 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf797 4.2552455881 0 84.835999775 0.32400022500000036 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 4 5 promise swing_level 6 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf798 2.76636700953 0 83.53999915 1.8300012750000079 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf799 4.22476570574 0 83.30599935 2.1810009750000177 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf800 3.6587939114 0 83.05299955 2.5605006750000143 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 4 promise swing_level 3 5 promise swing_level 6 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf801 4.09568311293 0 84.89399985 0.266000150000005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 5 promise swing_level 6 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf802 4.2552455881 0 85.200001375 0.25999862500000576 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 4 5 promise swing_level 6 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf803 2.46568541903 0 83.904499675 1.2832504875000126 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf804 4.74094044098 0 82.86699975 2.839500375 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 4 5 promise swing_level 6 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf805 4.11281886651 0 85.239499525 0.22050047500000575 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 4 5 promise swing_level 6 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf806 4.64388818634 0 83.799499325 1.4407510125000087 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf807 4.78711530883 0 83.751000675 1.5134989875000073 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf808 4.63658695371 0 83.893499875 1.2997501875000026 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf809 1.97124355876 0 83.85899975 1.3515003750000147 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf810 3.98499520747 0 83.9450001 1.2224998500000055 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf811 3.52741090964 0 84.077000975 1.0244985375000013 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf812 2.99080920887 0 84.590000025 0.5699999750000103 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf813 4.60761017185 0 83.8474998 1.3687503000000163 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf814 4.06183643479 0 84.022999375 1.105500937500011 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf815 3.34724520741 0 84.5710008 0.588999200000012 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf816 4.00665156404 0 83.653000675 1.6604989875000058 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf817 4.23690511031 0 83.977000425 1.1744993625000077 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf818 2.71542297875 0 84.7090005 0.4509995000000032 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf819 3.15731364232 0 84.37249985 0.5812502250000122 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf820 3.48571779877 0 84.40899965 0.5265005250000101 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf821 3.51061454803 0 84.460999725 0.6990002750000116 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf822 3.51061454803 0 84.4284992 0.4972512000000009 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf823 3.25488617683 0 84.582499525 0.5775004750000022 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf824 2.79256263316 0 84.7004999 0.4595001000000082 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf825 2.52912274255 0 84.343999825 0.6240002625000116 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf826 2.57325940549 0 85.587500225 -0.1275002249999943 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf827 2.08260997416 0 85.6949997 -0.23499969999999165 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf828 2.71542297875 0 84.482500075 0.6774999250000008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf829 2.15883788221 0 84.616500025 0.5434999750000117 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf830 3.61386264477 0 84.2965002 0.6952497000000122 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf831 3.36634957954 0 84.5810001 0.578999900000008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf832 2.48647461628 0 84.261000025 0.7484999625000057 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf833 3.25488617683 0 84.6624995 0.49750050000000956 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf834 2.79256263316 0 84.6570003 0.502999699999998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf835 2.81925910919 0 85.500000125 -0.04000012499999456 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf836 2.21727076111 0 84.65950015 0.5004998500000056 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf837 2.90250153505 0 85.5304994 -0.07049939999999139 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf838 3.38956467165 0 84.506500075 0.6534999249999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf839 3.21935410129 0 84.40149955 0.5377506750000123 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf840 2.23579750603 0 84.38950035 0.5557494749999989 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf841 3.38956467165 0 84.5390001 0.6209999000000096 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf842 2.66632133899 0 85.676500775 -0.2165007749999887 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf843 2.81925910919 0 84.6275007 0.5324993000000063 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf844 1.70856970404 0 85.419998 0.04000199999999837 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf845 3.52741090964 0 84.5234993 0.6365007000000077 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf846 4.50209909388 0 83.777499825 1.4737502624999976 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 4 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf847 5.03217204616 0 83.9095009 1.2757486500000113 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf848 4.92375419354 0 84.24299925 0.7755011250000123 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf849 3.46931522498 0 84.1740002 0.8789997000000156 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf850 4.27374553867 0 83.932 1.2420000000000044 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf851 4.29300531776 0 84.79499985 0.36500015000000874 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf852 5.20997422309 0 83.6445001 1.6732498500000048 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf853 4.68074200425 0 83.96350045 1.1947493250000107 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf854 4.10709103894 0 84.11049995 0.9742500750000005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf855 3.70045496902 0 85.319000275 0.14099972500001173 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf856 2.50336009449 0 83.910500825 1.274248762500008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf857 4.23690511031 0 83.625999475 1.701000787500007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf858 4.8661051796 0 83.69950015 1.590749774999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf859 2.28075482883 0 84.09450065 0.9982490250000069 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf860 5.02441943023 0 84.301500975 0.6877485375000134 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf861 5.11150591832 0 84.06200025 1.0469996250000122 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf862 4.42174155156 0 84.074000375 1.028999437500012 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf863 3.88013266186 0 84.0149998 1.1175003000000103 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf864 3.42944293235 0 84.10900035 0.9764994750000042 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf865 3.62274145966 0 84.2265001 0.8002498500000144 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf866 3.3549420805 0 85.2805001 0.17949990000000754 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf867 5.29506075557 0 84.064000875 1.0439986875000002 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf868 1.97658461885 0 84.076500575 1.025249137500012 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf869 3.65459743556 0 84.115999575 0.9660006375000023 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf870 4.67403368929 0 84.097999925 0.9930001125000132 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf871 2.55100017838 0 84.878999625 0.2810003749999993 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf872 4.81915540131 0 84.227499325 0.7987510125000128 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf873 3.34760900076 0 83.82450065 1.403249025000001 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf874 5.11150591832 0 84.492499725 0.6675002750000033 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf875 2.46382267142 0 85.919500175 -0.4595001749999909 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf876 4.94025067672 0 84.42749955 0.4987506750000179 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf877 5.20166453319 0 84.06699965 1.0395005250000082 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf878 4.85885544059 0 84.432499275 0.4912510875000109 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf879 2.9200817227 0 85.785500925 -0.32550092499998867 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf880 4.46188167251 0 84.607499625 0.5525003750000025 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf881 3.39002063361 0 85.37800025 0.08199975000000564 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf882 4.85885544059 0 84.611500325 0.5484996750000107 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf883 4.23690511031 0 83.910499925 1.2742501125000132 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf884 3.89600156508 0 84.11800005 0.9629999249999983 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf885 4.94025067672 0 84.202500325 0.8362495125000038 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf886 5.20166453319 0 84.29650075 0.6952488749999972 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf887 1.89161252668 0 83.965499075 1.1917513875000125 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf888 4.78009885505 0 84.21499925 0.8175011249999997 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf889 5.1195298246 0 83.801499525 1.4377507125000122 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf890 4.749306487 0 83.99499935 1.147500975000007 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf891 4.749306487 0 84.472999575 0.6870004250000022 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf892 4.85885544059 0 84.128499925 0.9472501125000079 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf893 4.63658695371 0 83.60299915 1.7355012750000043 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf894 4.03958106713 0 84.0860001 1.0109998499999975 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf895 4.0017352815 0 84.55099985 0.6090001500000085 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf896 2.12312397991 0 83.715999125 1.5660013125000134 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf897 3.48571779877 0 84.22900075 0.7964988750000117 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf898 3.09112547159 0 85.610999925 -0.15099992499999643 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf899 4.94774553187 0 83.7374994 1.5337509000000011 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf900 4.40131209873 0 83.785500375 1.4617494375000106 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf901 4.749306487 0 84.460999675 0.6990003250000086 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf902 3.09150467208 0 84.3419994 0.6270008999999988 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf903 2.1110784986 0 85.57450055 -0.1145005499999911 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf904 3.08174415116 0 84.675499325 0.48450067500000105 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf905 3.61386264477 0 84.1864996 0.8602506000000005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf906 3.51061454803 0 83.785001375 1.462497937500018 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf907 3.17129190255 0 84.4570002 0.4544997000000137 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf908 2.96673931882 0 84.529999 0.6300010000000015 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf909 4.48904909382 0 84.50700015 0.652999850000009 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf910 3.52359981675 0 85.515999775 -0.05599977499999226 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf911 3.56620961403 0 83.5610005 1.798499249999999 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf912 3.79571140468 0 84.180500175 0.8692497374999988 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf913 5.1195298246 0 83.684500775 1.6132488375000023 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf914 4.42774475186 0 83.99950025 1.1407496250000122 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf915 4.20069439548 0 83.33799975 2.133000375000016 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf916 5.48110429858 0 82.92599965 2.7510005250000162 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 3 5 promise swing_level 6 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf917 4.48157410599 0 84.8595009 0.3004991000000047 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 4 5 promise swing_level 6 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf918 3.43305296381 0 82.986499725 2.6602504125000053 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf919 3.07180677592 0 85.713500825 -0.2535008249999919 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf920 2.80851950068 0 83.7929987 1.4505019500000103 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf921 2.65915985321 0 82.96049895 2.6992515750000052 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf922 4.36224960626 0 83.6820007 1.6169989500000028 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf923 2.55100017838 0 83.695000375 1.597499437499998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf924 4.23690511031 0 83.8964995 1.295250750000001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf925 3.20891955001 0 83.2314995 2.2927507500000104 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf926 4.55072975326 0 84.80150085 0.35849915000000865 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 5 promise swing_level 6 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf927 2.92553630931 0 83.336999875 2.1345001875000023 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf928 4.49523657385 0 83.916500425 1.2652493625000147 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf929 5.50155459048 0 83.51249965 1.8712505250000149 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf930 3.67694425138 0 84.235999875 0.7860001875000009 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf931 4.93119904091 0 82.9615 2.6977500000000063 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 3 4 promise swing_level 3 5 promise swing_level 6 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf932 4.0900028821 0 83.89899975 1.2915003750000054 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 4 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf933 4.22476570574 0 83.745500275 1.521749587500011 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf934 3.80013422222 0 84.2410002 0.7784997000000047 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf935 5.30367177341 0 82.856500275 2.855249587500005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf936 3.25488617683 0 84.620499575 0.5395004250000085 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf937 2.96979047613 0 83.305499825 2.1817502625000103 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf938 2.73557376185 0 83.313000675 2.170498987500011 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf939 3.43696886837 0 84.61449925 0.5455007500000107 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf940 2.97272039686 0 83.092500525 2.5012492124999994 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 5 promise swing_level 6 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf941 3.99579404276 0 82.91350005 2.7697499250000135 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 6 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf942 3.80013422222 0 84.844499225 0.3155007749999982 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf943 2.32388897879 0 83.23450085 2.2882487250000025 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf944 2.06369518543 0 83.706999925 1.5795001124999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf945 2.65671333449 0 83.801499775 1.4377503375000131 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf946 4.78711530883 0 84.057999775 1.0530003375000092 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf947 4.55072975326 0 83.11199915 2.4720012750000038 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf948 4.55072975326 0 84.099999525 0.9900007125000059 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf949 3.57011348802 0 84.12449885 0.9532517250000154 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf950 2.64243046275 0 84.739500725 0.42049927500000595 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf951 4.6659305061 0 83.43800025 1.982999625000005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf952 3.23709823473 0 83.22099955 2.3085006750000048 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf953 2.08260997416 0 85.3160003 0.14399970000000623 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf954 2.50761735877 0 84.571500425 0.5884995750000087 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf955 2.25112114639 0 85.2259992 0.23400080000000117 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf956 2.75602585333 0 84.68899955 0.4710004499999997 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf957 2.90250153505 0 85.53949975 -0.07949974999999937 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf958 2.69064816356 0 84.621499775 0.5385002250000014 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf959 2.93135224398 0 85.440000125 0.019999875000007716 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf960 2.57325940549 0 85.5850002 -0.12500019999999096 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf961 3.51061454803 0 84.41399995 0.519000075000001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf962 2.22395915051 0 85.238499825 0.2215001749999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf963 2.79256263316 0 84.66099955 0.4990004500000055 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf964 3.36634957954 0 84.44199985 0.4770002250000047 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf965 2.73050807996 0 84.76400015 0.39599985000000404 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf966 2.54220394201 0 84.27050005 0.7342499250000145 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf967 1.77010128766 0 84.79199985 0.36800015000000885 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf968 1.87955651685 0 84.387500975 0.5587485375000156 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf969 3.51061454803 0 84.443000275 0.47549958750000343 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf970 2.57325940549 0 85.5029995 -0.04299949999999625 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf971 2.31110203954 0 84.367001075 0.5894983875000008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf972 3.04014002515 0 84.583499675 0.5765003250000064 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf973 2.520476477 0 84.920999525 0.23900047500000599 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf974 2.23579750603 0 84.410500025 0.5242499625000008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf975 2.87421319527 0 84.518499575 0.6415004249999982 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf976 3.46931522498 0 84.362999925 0.5955001125000123 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf977 2.81925910919 0 85.587499775 -0.1274997749999926 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf978 2.23579750603 0 84.61800015 0.5419998500000048 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf979 2.09742183942 0 85.799500125 -0.33950012499998933 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf980 2.12156410089 0 85.204999775 0.2550002250000006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf981 2.25112114639 0 85.313998025 0.1460019750000015 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf982 2.22395915051 0 85.109500725 0.0504992750000014 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf983 3.36634957954 0 84.5195002 0.6404998000000092 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf984 1.88313156795 0 84.220001 0.8099985000000132 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf985 2.48647461628 0 84.275000025 0.727499962500012 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf986 3.11726491618 0 84.408999825 0.526500262500015 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf987 1.63288729942 0 85.540001 -0.0800009999999986 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf988 2.46568541903 0 84.273999025 0.7290014625000154 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf989 2.25801563131 0 84.59349955 0.5665004500000009 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf990 3.39002063361 0 85.404500225 0.05549977499999842 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf991 4.8661051796 0 83.647499625 1.6687505625000156 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf992 3.42944293235 0 83.4234997 2.0047504500000173 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf993 4.46799443574 0 83.8705004 1.3342494000000116 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf994 4.05678387799 0 84.88300015 0.27699985000000427 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf995 4.98960168004 0 83.691000175 1.6034997375000088 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf996 3.3549420805 0 85.6950002 -0.2350001999999904 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf997 4.85885544059 0 84.159999825 0.9000002625000079 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf998 4.42174155156 0 84.0690004 1.036499400000018 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf999 4.57189457086 0 83.5935001 1.7497498500000077 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1000 4.00717282134 0 84.706499675 0.4535003250000017 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1001 3.42944293235 0 84.19900035 0.8414994749999991 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1002 4.61551188309 0 84.13349915 0.939751274999999 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1003 4.56549440949 0 84.69500005 0.4649999500000007 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1004 2.45770641547 0 84.9455009 0.21449910000000616 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1005 4.50209909388 0 84.090499975 1.0042500375000074 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1006 3.62274145966 0 84.25799995 0.7530000750000099 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1007 3.35149437292 0 85.331499925 0.12850007500000232 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1008 4.42174155156 0 84.472500425 0.6874995749999983 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1009 3.42944293235 0 84.1065001 0.9802498499999999 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1010 4.17689583184 0 83.5660006 1.7909991000000147 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1011 2.69064816356 0 85.183000025 0.2769999750000068 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1012 3.03418372506 0 84.42499865 0.5025020249999983 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1013 5.06758855666 0 84.20349975 0.8347503749999987 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1014 2.28268924961 0 84.152999325 0.9105010125000135 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1015 4.84289719126 0 84.198500425 0.8422493624999987 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1016 2.95217432031 0 84.621499975 0.538500024999999 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1017 5.11150591832 0 84.08950015 1.0057497749999982 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1018 5.20166453319 0 84.322499275 0.65625108750001 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1019 3.65459743556 0 85.40350105 0.056498950000002435 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1020 5.24794714078 0 84.1174996 0.9637506000000045 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1021 4.48904909382 0 83.950000025 1.2149999625000163 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1022 1.91370764343 0 84.51899985 0.641000150000005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1023 4.94025067672 0 84.159999475 0.900000787499998 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1024 5.20997422309 0 83.6000002 1.7399997000000127 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1025 1.94407886078 0 84.4380006 0.4829991000000149 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1026 4.63000451649 0 84.37299955 0.5805006750000032 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 5 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1027 3.48571779877 0 84.536999725 0.623000275000004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1028 4.70385465798 0 84.17350025 0.8797496250000023 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1029 3.54863373783 0 85.508500075 -0.04850007499999548 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1030 2.54246041813 0 84.002999975 1.135500037500016 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1031 3.28030657478 0 83.732499825 1.5412502625000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1032 4.38883904114 0 84.404000025 0.5339999625000047 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1033 4.94025067672 0 84.49599915 0.6640008500000022 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1034 2.7823322265 0 83.805999775 1.4310003375000022 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1035 5.29506075557 0 84.063499725 1.044750412500008 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1036 2.35939223224 0 84.255000275 0.757499587500007 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1037 4.45514111937 0 83.9215 1.2577500000000157 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1038 4.42174155156 0 84.553499025 0.6065009750000115 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 5 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1039 4.78711530883 0 83.76300065 1.495499025000015 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1040 4.77162407078 0 83.67400035 1.6289994750000076 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1041 4.85885544059 0 84.3159998 0.6660003000000074 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1042 4.63658695371 0 83.69750025 1.593749625000001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1043 4.74167028213 0 84.574500575 0.5854994250000033 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1044 2.94605637042 0 85.681999675 -0.221999674999995 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1045 2.97872563985 0 84.83500035 0.32499965000000375 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1046 4.23690511031 0 83.579999525 1.7700007125 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1047 5.11150591832 0 84.190999575 0.853500637499998 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1048 2.27543803127 0 83.7610006 1.4984991000000036 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1049 4.70385465798 0 84.326499375 0.6502509375000116 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1050 3.89600156508 0 84.084000775 1.0139988374999973 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1051 2.75602585333 0 83.7834995 1.4647507500000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1052 3.08174415116 0 84.698999775 0.4610002250000008 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1053 3.23388836206 0 84.3539997 0.6090004500000035 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1054 3.25488617683 0 85.49199935 -0.031999349999995264 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1055 3.11726491618 0 83.722501 1.5562485000000166 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1056 4.09568311293 0 84.7879989 0.3720011 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 6 5 promise swing_level 7 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1057 4.68074200425 0 82.839501025 2.8807484625000015 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1058 4.21872205213 0 82.899499925 2.7907501125000067 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 4 4 promise swing_level 5 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1059 4.63658695371 0 83.758500025 1.5022499625000023 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1060 4.17689583184 0 83.2039997 2.334000450000012 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1061 3.59623494226 0 83.256499825 2.255250262499999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1062 2.15414066881 0 83.709500275 1.5757495875000131 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1063 4.68074200425 0 83.797000425 1.444499362500018 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1064 4.23690511031 0 84.951499925 0.208500075000012 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1065 2.81925910919 0 85.541500325 -0.0815003249999961 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1066 3.18458942266 0 83.054500675 2.558248987500008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1067 4.56481779115 0 84.1305011 0.9442483500000023 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1068 3.51061454803 0 84.48900035 0.6709996500000074 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1069 4.23690511031 0 83.927499375 1.248750937500013 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1070 3.58748542243 0 84.0855007 1.0117489500000119 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1071 2.93135224398 0 83.9939995 1.1490007500000061 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1072 2.93135224398 0 83.29949975 2.1907503750000146 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1073 3.57011348802 0 84.0904995 1.004250749999997 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1074 3.32835645011 0 84.032000125 1.0919998125000134 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1075 4.60761017185 0 83.177500175 2.373749737499999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 6 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1076 4.60761017185 0 82.949500075 2.715749887500003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1077 4.78711530883 0 83.34050005 2.1292499250000034 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1078 4.49523657385 0 83.710001025 1.5749984625000124 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1079 4.52975999063 0 83.673999975 1.6290000375000062 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 4 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1080 4.71064890814 0 83.7225004 1.5562494000000058 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1081 3.22287236085 0 85.8445007 -0.3845006999999924 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1082 4.42774475186 0 83.860500775 1.3492488374999994 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1083 3.59194793631 0 82.866000325 2.840999512500005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1084 2.95491207034 0 82.98349935 2.664750975000004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 4 promise swing_level 6 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1085 4.53672840544 0 83.43900085 1.9814987250000087 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1086 4.01210248164 0 83.82750075 1.3987488750000097 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 4 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1087 4.93119904091 0 82.779999175 2.970001237500007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 5 4 promise swing_level 5 5 promise swing_level 6 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1088 2.94605637042 0 85.023499225 0.13650077500001034 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 4 5 promise swing_level 6 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1089 3.61386264477 0 84.471500375 0.6884996250000001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1090 4.23690511031 0 85.153500175 0.006499825000000237 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1091 3.73279362334 0 84.19550035 0.8467494750000029 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1092 3.79036576283 0 83.019999925 2.6100001125000176 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1093 2.91115808335 0 82.817999575 2.913000637500005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1094 4.13009860961 0 83.7584997 1.5022504500000053 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1095 2.79256263316 0 85.383500175 0.07649982500001046 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1096 3.35104871505 0 84.316499925 0.6652501125000043 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1097 2.54220394201 0 84.5500002 0.6099998000000056 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1098 2.57325940549 0 85.577000575 -0.11700057499999444 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1099 3.0527280537 0 84.7215004 0.43849960000000865 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1100 2.90250153505 0 85.438500075 0.02149992500001191 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1101 2.90250153505 0 85.4419999 0.018000100000006125 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1102 3.36634957954 0 84.4759992 0.6840008000000012 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1103 2.57325940549 0 85.649500525 -0.18950052499998832 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1104 3.17089287752 0 84.664999575 0.4950004250000092 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1105 2.23579750603 0 84.4165005 0.5152492500000108 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1106 3.25488617683 0 84.6055004 0.5544996000000083 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1107 2.14786504266 0 84.86900085 0.290999149999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1108 2.06800584288 0 85.24100055 0.2189994500000097 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1109 3.61386264477 0 84.338999775 0.6315003375000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1110 2.71542297875 0 84.4300004 0.49499940000001175 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1111 2.37819629574 0 84.431499375 0.49275093750000565 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1112 2.35917135957 0 84.67300035 0.4869996500000099 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1113 1.70856970404 0 85.379997 0.0800030000000021 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1114 3.48571779877 0 84.40549945 0.5317508250000174 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1115 2.87421319527 0 85.49099965 -0.0309996500000011 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1116 1.95815465425 0 84.98050025 0.1794997499999994 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1117 2.520476477 0 84.85250035 0.30749965000000545 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1118 3.02145143763 0 84.340000375 0.629999437500004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1119 2.93135224398 0 84.60249935 0.5575006500000029 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1120 2.90250153505 0 85.236000225 0.22399977500000717 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1121 3.04014002515 0 84.53899935 0.6210006500000077 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1122 3.32835645011 0 84.46 0.45000000000001705 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1123 2.08260997416 0 85.3374998 0.12250020000000178 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1124 2.90250153505 0 84.53899995 0.6210000500000007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1125 2.25801563131 0 84.70149975 0.4585002500000087 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1126 2.34790180657 0 85.076000025 0.08399997500000611 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1127 3.15731364232 0 84.309000375 0.6764994375000128 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1128 3.13716146983 0 84.2914997 0.7027504500000035 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1129 3.36634957954 0 84.5414993 0.618500700000007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1130 2.65671333449 0 85.414500225 0.045499775000007514 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1131 2.25112114639 0 85.206000475 0.25399952500000894 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1132 3.51061454803 0 84.415499925 0.5167501124999987 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1133 2.86305165382 0 84.520499675 0.6395003250000088 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1134 2.06800584288 0 85.2094993 0.25050070000000063 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1135 1.87955651685 0 84.300499575 0.6892506375000025 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1136 2.52912274255 0 84.443 0.4755000000000109 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1137 3.21935410129 0 84.363999875 0.5940001875000007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1138 4.98960168004 0 83.727999825 1.548000262500011 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1139 3.88013266186 0 84.2445007 0.7732489500000028 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1140 4.29866382416 0 83.736000425 1.5359993625000072 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1141 3.36287839839 0 84.9799994 0.18000060000000817 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1142 4.11336811599 0 84.948000175 0.21199982500000092 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1143 3.13716146983 0 84.58100025 0.5789997500000027 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1144 3.22639831866 0 84.114499475 0.9682507875000042 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1145 4.63728501684 0 84.447499875 0.4687501874999995 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1146 3.52359981675 0 85.3645004 0.09549960000000796 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1147 3.36671753766 0 85.755499775 -0.29549977499999897 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1148 4.60110969497 0 84.51249985 0.6475001500000076 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1149 2.996952631 0 84.0304993 1.094251050000004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1150 3.62274145966 0 84.2284994 0.7972509000000016 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1151 5.15619113861 0 84.3365008 0.6352488000000136 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1152 3.92713031704 0 85.00750055 0.15249945000000198 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1153 4.92375419354 0 84.06299915 1.0455012750000137 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1154 4.23140792301 0 84.767499175 0.3925008250000076 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1155 3.37064741875 0 84.136499825 0.9352502625000056 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1156 2.90250153505 0 84.498500825 0.6614991750000115 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1157 3.89600156508 0 84.886999325 0.27300067500000014 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1158 4.81915540131 0 84.243999925 0.774000112500012 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1159 3.15056754597 0 83.5110002 1.8734997000000106 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1160 2.89145135544 0 84.800000225 0.359999775 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1161 2.21913447815 0 84.9034996 0.2565004000000016 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1162 4.53042625525 0 84.059999225 1.0500011625000099 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1163 4.8661051796 0 83.75349945 1.5097508249999976 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1164 4.42174155156 0 84.588999025 0.5710009749999984 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1165 2.53372287512 0 85.0210006 0.13899940000001154 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1166 3.89600156508 0 84.07499965 1.0275005250000149 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1167 3.2442204033 0 85.396500425 0.06349957500000586 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1168 3.37064741875 0 83.959000575 1.2014991375000008 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1169 2.26864111687 0 84.7609998 0.3990002000000118 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1170 4.8992150082 0 84.030000525 1.0949992124999994 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1171 5.06758855666 0 84.252499175 0.7612512375000122 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1172 3.3138339442 0 83.9520012 1.2119982000000107 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1173 3.27701048398 0 84.43649945 0.4852508250000085 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1174 5.15619113861 0 84.062499425 1.0462508625000098 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1175 3.48199621824 0 83.47000045 1.9349993250000068 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1176 4.45514111937 0 83.86149985 1.3477502250000057 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1177 3.67738324523 0 85.15999925 7.500000066551493e-07 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1178 2.95195426078 0 84.16750005 0.8887499250000062 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1179 4.63728501684 0 84.36899985 0.586500225000016 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1180 3.79571140468 0 84.061999775 1.0470003375000019 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1181 3.73279362334 0 84.300998875 0.6885016875000005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1182 3.06230535434 0 85.63949985 -0.17949985000000196 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1183 3.62274145966 0 84.0994999 0.9907501500000109 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 5 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1184 4.05678387799 0 84.872500425 0.2874995750000068 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1185 5.06758855666 0 84.19599995 0.8460000750000063 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1186 3.22639831866 0 84.318999525 0.6615007125000147 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1187 2.73080396045 0 85.858501025 -0.39850102499999024 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1188 5.24794714078 0 84.07850005 1.0222499250000041 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1189 3.66367779072 0 84.929499375 0.23050062499999913 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1190 4.17689583184 0 84.15650035 0.9052494750000051 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1191 4.00665156404 0 84.109999875 0.975000187500008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1192 3.49397738577 0 84.228999825 0.7965002625000039 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1193 3.61386264477 0 83.93299965 1.2405005250000087 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1194 4.78009885505 0 83.9950004 1.1474994000000152 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1195 4.94025067672 0 84.21699965 0.8145005249999997 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1196 4.71136945196 0 83.913500275 1.2697495875000016 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1197 2.77965392715 0 83.822999425 1.4055008624999985 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1198 4.38883904114 0 84.446500575 0.4702491375000051 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1199 3.18148278982 0 85.370500225 0.08949977500000444 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1200 4.70385465798 0 84.15549925 0.9067511249999995 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1201 2.48024552199 0 85.467500325 -0.0075003249999980315 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1202 2.79256263316 0 83.691500675 1.6027489875000072 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1203 5.20078623125 0 82.904999725 2.7825004125000135 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 4 4 promise swing_level 3 5 promise swing_level 6 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1204 3.53595970915 0 82.8145004 2.9182494000000077 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 4 promise swing_level 4 5 promise swing_level 7 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1205 2.83279967723 0 83.692000175 1.6019997375000017 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1206 2.98180151453 0 83.400999475 2.0385007874999985 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1207 3.31339824504 0 84.58200015 0.5779998500000062 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1208 3.24779325302 0 83.4039996 2.034000599999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1209 4.23690511031 0 83.895500925 1.2967486125000178 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1210 4.21269566507 0 83.9574997 1.2037504500000082 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1211 4.31754406538 0 85.1849999 0.2750001000000111 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 3 5 promise swing_level 6 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1212 5.40080120652 0 83.6874994 1.6087509000000182 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1213 4.26756104107 0 84.1684996 0.8872506000000016 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1214 4.40789063084 0 83.836500375 1.3852494375000077 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 4 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1215 2.90250153505 0 85.452999475 0.00700052500000653 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1216 3.90066133282 0 83.6710012 1.6334981999999982 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1217 4.09568311293 0 83.6889996 1.606500600000004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1218 2.32202027362 0 83.543500275 1.8247495875000084 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1219 3.9162009563 0 84.6829998 0.47700020000000054 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 7 5 promise swing_level 6 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1220 4.55072975326 0 85.096499475 0.06350052500000347 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 5 promise swing_level 6 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1221 4.98960168004 0 83.7640008 1.4939988 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1222 3.48571779877 0 84.50499995 0.6550000500000067 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1223 3.82984987584 0 83.013000225 2.620499662500009 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 4 4 promise swing_level 3 5 promise swing_level 6 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1224 3.95294609782 0 83.464500725 1.9432489125000174 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1225 3.31720456502 0 83.103500175 2.4847497375000174 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1226 3.97961764859 0 84.113999175 0.9690012375000023 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1227 4.34938240412 0 83.86949975 1.3357503750000035 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1228 3.93186488986 0 83.912500025 1.2712499625000078 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1229 2.80051833724 0 84.850498825 0.3095011750000026 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 5 promise swing_level 6 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1230 4.52975999063 0 83.901999775 1.287000337500018 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1231 3.29121134217 0 83.7880003 1.457999550000018 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1232 4.42774475186 0 83.8934997 1.2997504499999977 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1233 3.84974163103 0 83.78649965 1.4602505250000135 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1234 2.98180151453 0 83.39149895 2.052751575000009 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1235 4.95606048827 0 83.6475006 1.6687491000000065 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1236 5.03217204616 0 83.9230005 1.2554992500000068 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1237 4.18282015663 0 83.865999325 1.3410010125000014 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1238 4.79489870881 0 83.8415007 1.3777489500000115 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1239 2.81925910919 0 85.5619991 -0.10199909999998907 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1240 2.06800584288 0 85.200000025 0.2599999750000109 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1241 2.25112114639 0 85.3334991 0.12650090000000774 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1242 2.2528774348 0 84.559500325 0.6004996750000032 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1243 2.20731326885 0 85.2654997 0.19450029999999857 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1244 1.98450862124 0 84.213499675 0.8197504875000163 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1245 3.25488617683 0 84.619999825 0.5400001750000115 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1246 1.97124355876 0 84.7954997 0.36450030000001166 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1247 2.57325940549 0 85.617999425 -0.15799942499998848 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1248 2.50761735877 0 84.6404995 0.5195005000000009 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1249 2.09298780883 0 85.132 0.028000000000000136 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1250 2.22395915051 0 85.2194998 0.240500200000011 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1251 2.40535258985 0 84.41350025 0.51974962500001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1252 2.00610810282 0 85.483500225 -0.0235002249999951 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1253 3.61386264477 0 84.378500175 0.5722497375000088 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1254 3.61386264477 0 84.35549885 0.6067517250000023 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1255 2.86305165382 0 84.409499925 0.525750112499999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1256 2.69064816356 0 84.656499375 0.5035006250000095 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1257 2.52912274255 0 84.3340002 0.6389996999999994 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1258 2.48647461628 0 84.448000275 0.46799958750001025 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1259 2.47811701105 0 84.3744995 0.5782507500000094 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1260 2.71542297875 0 84.460999525 0.6990004749999997 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1261 3.38956467165 0 84.596000075 0.5639999249999988 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1262 3.04014002515 0 84.447499975 0.46875003750000843 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1263 2.87421319527 0 84.63200015 0.527999850000009 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1264 2.44524097268 0 84.229999375 0.7950009374999993 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1265 2.81925910919 0 85.5640009 -0.10400089999999124 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1266 2.29313137734 0 85.1404991 0.019500900000005594 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1267 2.71542297875 0 85.543500475 -0.08350047499999674 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1268 2.09742183942 0 85.79849995 -0.33849994999998784 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1269 2.2528774348 0 85.329499975 0.13050002500000063 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1270 2.15883788221 0 85.187499975 0.27250002500001075 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1271 2.20731326885 0 85.132500875 0.027499124999999958 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1272 2.3293565891 0 84.273499475 0.7297507875000164 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1273 3.51061454803 0 84.3639995 0.5940007499999993 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1274 2.520476477 0 84.8505002 0.3094998000000061 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1275 1.9038241963 0 84.779999 0.3800010000000015 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1276 3.38956467165 0 84.550499525 0.6095004749999987 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1277 2.14786504266 0 84.65950045 0.5004995500000092 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1278 1.72558649193 0 85.040001 0.11999900000000141 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1279 2.61896392029 0 84.386499625 0.5602505625000092 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1280 3.75178902153 0 84.26000005 0.7499999250000045 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1281 2.81925910919 0 84.58749985 0.5725001500000048 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1282 3.36634957954 0 84.516499525 0.6435004750000047 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1283 3.06230535434 0 83.832 1.3920000000000172 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1284 3.63209225801 0 85.42550045 0.034499550000003876 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1285 2.57325940549 0 85.851500975 -0.3915009749999882 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1286 3.75178902153 0 84.207999775 0.8280003375000007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1287 3.48571779877 0 84.564999925 0.5950000750000101 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 5 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1288 4.20126736815 0 84.1924997 0.8512504500000091 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1289 3.28030657478 0 85.403499775 0.0565002250000049 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1290 5.02441943023 0 84.242999675 0.7755004875000182 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1291 4.29300531776 0 84.7849997 0.37500030000000495 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1292 4.23140792301 0 85.023499825 0.13650017500000333 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1293 3.05626197049 0 84.66499995 0.49500005000001013 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1294 2.88546210335 0 83.666500625 1.6402490625000112 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1295 4.85885544059 0 84.495499775 0.6645002250000062 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1296 2.51407607482 0 83.68499965 1.6125005250000157 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1297 3.63209225801 0 83.568000225 1.787999662499999 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1298 3.52741090964 0 85.570000275 -0.11000027499999304 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1299 2.8168240943 0 85.806501225 -0.3465012250000001 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1300 3.37064741875 0 83.6190004 1.711499400000001 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1301 2.95809675652 0 83.756499825 1.5052502624999988 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 3 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1302 3.3138339442 0 84.3659999 0.5910001499999993 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1303 3.62274145966 0 84.19999905 0.8400014250000041 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1304 4.8992150082 0 84.416499575 0.515250637500003 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1305 2.7823322265 0 84.07849965 1.0222505250000111 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1306 4.21929995362 0 84.0539999 1.059000150000017 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1307 2.50569074066 0 85.098000725 0.061999274999999465 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1308 4.94025067672 0 84.530999025 0.6290009750000053 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1309 2.35356305584 0 84.9864991 0.17350090000000196 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1310 3.44136359344 0 85.404999825 0.055000175000000706 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1311 5.06758855666 0 84.538499825 0.6215001750000028 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1312 2.64011518851 0 84.10400025 0.9839996250000098 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1313 2.58912177825 0 84.91949895 0.24050105000000033 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1314 2.95195426078 0 84.015999925 1.1160001125000036 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1315 5.07547506828 0 83.800500825 1.4392487625000072 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1316 3.99579404276 0 83.729001 1.5464985000000127 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1317 2.92871983878 0 84.704999725 0.45500027500001183 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1318 4.06183643479 0 83.56849895 1.787251574999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1319 2.64029120901 0 83.71000045 1.5749993250000145 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1320 4.29300531776 0 84.1159999 0.9660001499999993 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 5 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1321 2.95195426078 0 83.826499775 1.4002503375000046 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 5 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1322 4.53042625525 0 84.7080003 0.45199970000001033 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1323 4.98197957863 0 84.40399915 0.5340012750000014 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1324 3.79571140468 0 84.614998875 0.5450011250000074 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1325 2.65915985321 0 85.34999965 0.11000035000000424 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1326 3.37064741875 0 84.3649993 0.592501050000017 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1327 4.17689583184 0 84.12200105 0.9569984250000161 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1328 5.15619113861 0 84.27650015 0.7252497750000018 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1329 4.63728501684 0 84.7355007 0.42449930000000224 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1330 2.54900634755 0 83.790999225 1.453501162500018 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1331 4.23690511031 0 83.6294996 1.6957506000000038 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1332 3.40961783577 0 84.027500375 1.098749437500004 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1333 2.79817206231 0 84.89100015 0.2689998500000087 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1334 3.03418372506 0 84.971499875 0.1885001249999988 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1335 2.81925910919 0 83.92849995 1.2472500750000037 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1336 4.8661051796 0 83.95100085 1.213498725000008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1337 2.94339750184 0 85.747499675 -0.2874996749999951 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1338 3.18458942266 0 83.47799915 1.9230012750000043 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 5 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1339 3.69537408729 0 83.8855 1.3117500000000177 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1340 4.98960168004 0 83.982999525 1.1655007125000125 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1341 3.40559661135 0 85.217000375 0.24299962500000732 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1342 3.52741090964 0 85.022499625 0.13750037500001044 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1343 3.37064741875 0 85.420500025 0.039499975000009624 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1344 1.81816529668 0 84.532000175 0.627999825000012 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1345 3.56620961403 0 83.51199985 1.872000225000015 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1346 2.2494406043 0 85.57499995 -0.11499995000000068 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1347 3.83520142642 0 84.558000875 0.6019991250000004 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1348 3.17437865535 0 83.872500175 1.3312497375000092 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1349 3.54863373783 0 85.709999775 -0.24999977499999487 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1350 3.3549420805 0 84.014000875 1.1189986875000173 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1351 3.18148278982 0 84.84999985 0.3100001500000019 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1352 4.15978483669 0 84.3344996 0.6382506000000063 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1353 3.54863373783 0 83.9290011 1.2464983500000173 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 4 promise swing_level 7 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1354 3.34732604321 0 83.0205 2.60925000000001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1355 3.46931522498 0 84.52299915 0.6370008500000012 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1356 2.53782853275 0 83.95850035 1.2022494750000163 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1357 4.2552455881 0 84.7270005 0.43299950000000254 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 5 5 promise swing_level 6 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1358 5.45071234571 0 83.55850015 1.802249775000007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1359 4.03958106713 0 83.344000625 2.1239990624999976 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1360 2.85750332813 0 83.290500225 2.204249662500004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1361 2.77422947385 0 83.747999425 1.5180008625000028 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1362 4.74857429339 0 83.670500575 1.6342491374999994 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 4 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1363 4.20069439548 0 83.745500575 1.5217491375000165 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1364 4.63658695371 0 83.4800001 1.9199998500000106 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1365 4.38821376777 0 83.62699875 1.69950187500001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1366 3.81982297414 0 82.977500375 2.6737494374999997 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1367 4.23690511031 0 83.86299955 1.345500675000011 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1368 2.71542297875 0 83.72449995 1.5532500750000153 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1369 2.77678606216 0 83.733000325 1.5404995124999985 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1370 2.32202027362 0 84.432000225 0.4919996625000138 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1371 2.12768675694 0 83.7459999 1.521000150000006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1372 4.75623276948 0 82.9179996 2.763000600000005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1373 4.09568311293 0 83.8785009 1.3222486499999988 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1374 2.88828810173 0 83.65300025 1.660499625 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 4 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1375 4.02854458864 0 83.9105015 1.274247750000015 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1376 3.8247770812 0 83.755000275 1.507499587500007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 4 promise swing_level 7 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1377 2.75345263332 0 83.42750075 1.9987488750000182 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 4 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1378 2.78728389469 0 84.53850085 0.6214991499999997 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 5 promise swing_level 6 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1379 4.53672840544 0 83.794000075 1.4489998875000083 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1380 2.86589315985 0 84.18250045 0.8662493249999983 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1381 4.38821376777 0 83.12999975 2.4450003750000135 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1382 3.23709823473 0 83.0440008 2.5739987999999983 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1383 3.51061454803 0 83.959999875 1.2000001875000166 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1384 3.15738556461 0 83.069499225 2.5357511625000058 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1385 2.27196931651 0 82.832000175 2.891999737500001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 4 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1386 2.66632133899 0 83.85299985 1.3605002250000027 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1387 4.16509738546 0 85.040499675 0.11950032499999852 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 5 5 promise swing_level 6 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1388 5.20078623125 0 82.975999675 2.676000487500012 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 4 4 promise swing_level 3 5 promise swing_level 6 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1389 3.0527280537 0 85.170500525 0.2894994750000109 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1390 3.90066133282 0 83.36350095 2.094748575000004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1391 3.40522010623 0 84.39700035 0.5444994750000092 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1392 3.77580687597 0 84.02500015 1.1024997750000125 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1393 4.09568311293 0 84.999999975 0.16000002500001076 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1394 4.13009860961 0 83.86299945 1.345500825000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1395 4.03405527942 0 82.968000025 2.6879999625000153 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1396 4.17689583184 0 83.251499825 2.2627502625000133 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1397 3.8247770812 0 84.375999825 0.5760002625000169 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1398 4.59325717765 0 83.679000575 1.6214991375000025 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1399 5.1018164655 0 83.734499825 1.5382502625000072 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1400 2.67599909044 0 83.661499825 1.6477502624999971 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1401 3.36634957954 0 84.663999125 0.49600087500000145 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1402 2.15414066881 0 82.9609996 2.698500600000017 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1403 2.65915985321 0 83.14099995 2.4285000750000165 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1404 3.35104871505 0 84.30000015 0.689999775000004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1405 2.14786504266 0 84.893499275 0.26650072500000876 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1406 3.51061454803 0 84.51449985 0.645500149999998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1407 1.95815465425 0 85.7240007 -0.2640006999999997 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1408 2.75602585333 0 85.274500275 0.18549972500001105 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1409 2.57325940549 0 84.86099985 0.29900015000000624 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1410 2.22395915051 0 85.109500175 0.05049982500001138 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1411 2.87421319527 0 85.44650065 0.013499350000000743 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1412 2.08260997416 0 85.516 -0.056000000000000216 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1413 1.97124355876 0 84.79399855 0.3660014500000074 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1414 2.57325940549 0 85.588500725 -0.12850072499999782 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1415 2.64243046275 0 85.421999775 0.03800022500000183 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1416 2.21727076111 0 84.684499525 0.47550047499999837 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1417 2.18607703807 0 84.932499675 0.2275003250000026 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1418 2.52912274255 0 84.3734993 0.5797510499999987 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1419 2.36674469012 0 84.5410002 0.618999800000006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1420 3.0527280537 0 84.729000675 0.4309993250000105 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1421 2.90250153505 0 85.478500225 -0.018500224999999648 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1422 3.36634957954 0 84.5564997 0.6035003000000018 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1423 2.79256263316 0 85.561000025 -0.10100002499999333 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1424 2.09298780883 0 84.5274999 0.63250010000001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1425 2.48647461628 0 84.285500375 0.7117494375000106 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1426 3.04014002515 0 84.543999975 0.6160000249999996 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1427 1.70856970404 0 85.779999 -0.31999899999999853 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1428 2.90250153505 0 85.56850055 -0.10850054999999087 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1429 3.61386264477 0 84.393499375 0.5497509375000007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1430 2.57325940549 0 84.736500275 0.42349972500000777 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1431 3.32835645011 0 84.31750025 0.6637496250000154 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1432 2.73050807996 0 84.782500225 0.3774997749999983 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1433 2.520476477 0 84.8880012 0.2719988000000001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1434 2.47811701105 0 84.3615003 0.5977495500000032 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1435 2.64243046275 0 85.653000575 -0.19300057499998785 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1436 2.57325940549 0 85.5369999 -0.07699989999999274 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1437 2.52912274255 0 84.65049975 0.5095002500000106 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1438 2.65671333449 0 85.54450035 -0.08450035000000183 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1439 2.42513277215 0 84.3444992 0.6232512000000057 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1440 2.71542297875 0 85.47249925 -0.012499249999993356 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1441 1.93249146701 0 84.870000275 0.2899997250000098 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1442 2.25801563131 0 84.46749945 0.6925005499999998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1443 2.66632133899 0 85.7044995 -0.24449949999999204 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1444 2.40535258985 0 84.415500475 0.516749287500005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1445 2.79256263316 0 85.395500225 0.06449977499999876 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1446 3.32835645011 0 84.4009996 0.538500599999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1447 3.11726491618 0 84.439500675 0.48074898750000017 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1448 2.96673931882 0 84.50599935 0.654000650000009 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1449 3.21935410129 0 84.388998625 0.5565020625000088 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1450 2.79256263316 0 84.655999925 0.5040000750000019 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1451 5.03217204616 0 83.61450005 1.718249925000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1452 4.30554062944 0 84.4665009 0.6934991000000054 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1453 2.77703641729 0 84.832000325 0.3279996750000095 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1454 2.95787581321 0 85.771999525 -0.3119995249999931 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1455 4.48904909382 0 83.915499525 1.2667507125000057 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1456 4.68074200425 0 83.47350045 1.929749325000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1457 4.71136945196 0 84.43299955 0.49050067499999983 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1458 4.45514111937 0 83.878499675 1.322250487500007 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1459 2.95787581321 0 85.58850055 -0.1285005500000011 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1460 4.07305633168 0 83.997499325 1.1437510124999974 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1461 3.04014002515 0 84.52049955 0.6395004500000084 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1462 2.56430926229 0 84.332500825 0.6412487625000125 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1463 3.89600156508 0 84.120999875 0.9585001875000145 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1464 4.39538031203 0 84.456999725 0.4545004125000034 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1465 4.24911447842 0 83.8260002 1.400999700000014 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1466 3.40961783577 0 84.109000375 0.9764994375000171 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1467 3.16076961237 0 85.749999825 -0.28999982499999816 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1468 4.45514111937 0 84.654498875 0.5055011250000035 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1469 3.62274145966 0 84.223499725 0.8047504125000131 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1470 4.35642257412 0 84.718999525 0.44100047500000417 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1471 2.80851950068 0 83.73500055 1.5374991750000149 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 3 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1472 3.04014002515 0 84.6999993 0.46000070000000337 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 3 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1473 3.19188638661 0 84.054999725 1.057500412500005 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1474 2.09164547147 0 85.118001175 0.04199882500000174 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1475 2.37819629574 0 83.572999775 1.7805003375000084 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1476 3.19188638661 0 85.14950045 0.010499550000000135 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1477 2.32576069415 0 85.365999575 0.09400042500000155 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1478 4.63728501684 0 84.182000175 0.8669997375000094 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 4 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1479 3.80503735759 0 84.14800005 0.9179999250000179 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1480 4.45514111937 0 84.49699995 0.6630000500000023 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1481 3.06572586948 0 84.7584995 0.4015005000000059 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1482 4.55843741409 0 83.88599915 1.3110012750000024 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1483 4.33659088676 0 83.991000825 1.153498762500007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1484 4.8992150082 0 84.44749955 0.46875067500000256 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1485 4.67403368929 0 84.212499625 0.8212505624999977 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1486 5.15619113861 0 84.23949985 0.7807502250000056 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1487 4.85885544059 0 84.102498825 0.9862517625000109 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1488 4.03958106713 0 83.91450035 1.2682494750000117 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1489 3.08174415116 0 84.2915001 0.7027498500000178 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1490 3.03083319426 0 85.490499575 -0.03049957499999606 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1491 3.37413466519 0 85.1354994 0.024500600000004646 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1492 3.35149437292 0 84.020998975 1.1085015375000111 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1493 3.57011348802 0 84.199500475 0.8407492875000173 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1494 4.60761017185 0 83.671999325 1.6320010125000053 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1495 3.46931522498 0 84.2044989 0.8332516500000011 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1496 4.18876131089 0 84.02900055 1.0964991749999982 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1497 3.55290898553 0 84.258499 0.752251500000007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1498 3.17089287752 0 84.838000275 0.3219997250000063 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1499 3.35104871505 0 84.07699945 1.024500825000004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1500 2.48024552199 0 85.013499775 0.14650022500000548 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1501 4.749306487 0 83.945500175 1.221749737499998 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1502 3.06572586948 0 84.238999375 0.7815009374999988 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1503 1.82384732846 0 86.0984997 -0.6384996999999999 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1504 5.1643560615 0 83.596000275 1.745999587500016 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1505 3.15731364232 0 85.310000375 0.1499996250000038 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1506 3.7139896045 0 84.292499775 0.7012503375000136 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1507 5.24794714078 0 84.065999225 1.0410011625000095 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1508 4.78711530883 0 83.602000225 1.7369996625000113 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1509 3.44499877858 0 84.628500675 0.5314993250000072 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1510 5.20166453319 0 84.00300095 1.1354985750000068 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1511 3.09112547159 0 85.83099975 -0.3709997499999986 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1512 4.44842090138 0 84.164500075 0.8932498874999979 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1513 3.87043543601 0 84.331499275 0.6427510875000095 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1514 3.07180677592 0 83.705500425 1.5817493625000125 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1515 3.12725210162 0 83.8950006 1.2974991000000031 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1516 4.68074200425 0 83.5494997 1.8157504500000101 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1517 4.67332451942 0 83.674999825 1.627500262500007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1518 4.8661051796 0 83.5669996 1.7895006000000038 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1519 4.11856269266 0 83.9880001 1.1579998500000173 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1520 5.50155459048 0 83.54749985 1.8187502250000165 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1521 4.87414777473 0 83.681000175 1.6184997375000165 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1522 4.46799443574 0 83.5689989 1.7865016500000124 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1523 3.97961764859 0 83.991001 1.153498500000012 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1524 2.87421319527 0 85.456000925 0.003999075000007235 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1525 4.33022330151 0 84.01749975 1.1137503750000093 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1526 4.36224960626 0 84.09600055 0.9959991750000086 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1527 4.42110686856 0 82.82649955 2.9002506750000165 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 4 promise swing_level 6 5 promise swing_level 6 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1528 4.55072975326 0 84.85199985 0.3080001500000066 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 4 5 promise swing_level 6 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1529 3.61386264477 0 84.846000625 0.3139993750000031 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 7 5 promise swing_level 7 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1530 4.23690511031 0 83.9835007 1.1647489500000177 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1531 3.6587939114 0 83.065500075 2.5417498875000035 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 4 promise swing_level 3 5 promise swing_level 6 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1532 2.43921651142 0 84.0864992 1.010251199999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1533 2.77945883512 0 84.957500475 0.2024995250000018 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 7 5 promise swing_level 6 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1534 4.00665156404 0 84.114000825 0.9689987625 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1535 5.60613859814 0 83.308 2.1779999999999973 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1536 2.86589315985 0 83.48950085 1.9057487250000094 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1537 4.44777853689 0 84.82299955 0.33700044999999934 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 4 5 promise swing_level 6 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1538 5.45071234571 0 83.319500375 2.1607494375000016 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1539 2.46568541903 0 84.11799985 0.9630002250000018 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1540 4.57189457086 0 83.727999675 1.5480004874999977 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1541 2.50340530846 0 85.493999925 -0.03399992499999199 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1542 4.71064890814 0 83.237999925 2.2830001125000123 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1543 5.60613859814 0 83.24800015 2.267999775000014 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1544 3.40522010623 0 84.3720006 0.5819990999999973 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1545 4.6004224797 0 83.61300065 1.7204990250000023 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1546 4.82628706711 0 83.58050005 1.769249925000011 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1547 4.98960168004 0 83.851999775 1.3620003375000138 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1548 4.0900028821 0 84.062500975 1.0462485374999986 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 4 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1549 4.48157410599 0 82.94499975 2.722500375000017 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 4 5 promise swing_level 6 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1550 4.29866382416 0 83.38900035 2.0564994750000025 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1551 3.28388159021 0 84.4439991 0.47400135000000887 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1552 3.44499877858 0 84.365500175 0.5917497375000167 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1553 2.40535258985 0 84.689001 0.4709990000000005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1554 3.04334411031 0 83.044500375 2.57324943750001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1555 3.98499520747 0 83.183999775 2.364000337500002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1556 2.81925910919 0 83.18050005 2.3692499249999983 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1557 5.1643560615 0 83.18350065 2.364749025000009 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1558 3.8397168061 0 84.010000775 1.1249988375000157 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1559 3.40522010623 0 83.8460004 1.3709994000000165 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1560 5.11065779663 0 82.902000325 2.786999512500003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 4 promise swing_level 4 5 promise swing_level 6 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1561 4.11856269266 0 83.042499125 2.5762513124999984 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 7 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1562 2.91691693472 0 83.645999325 1.6710010124999997 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1563 4.71064890814 0 83.300500225 2.1892496625000177 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1564 4.51588718524 0 84.844500175 0.31549982500001195 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 4 5 promise swing_level 6 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1565 4.42774475186 0 83.597499875 1.7437501875000123 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1566 2.28783231137 0 83.186499825 2.36025026250001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1567 4.06183643479 0 83.9924988 1.1512517999999972 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1568 4.68074200425 0 83.5995012 1.7407481999999987 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1569 4.68074200425 0 83.36649875 2.090251875 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf1570 3.09112547159 0 85.651999625 -0.19199962499999684 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet2/data/autotuner_data/tuner_promise_confs_batch220_single.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet2/data/autotuner_data/tuner_promise_confs_batch220_single.txt index c2f901cce9..cb23d8702f 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet2/data/autotuner_data/tuner_promise_confs_batch220_single.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet2/data/autotuner_data/tuner_promise_confs_batch220_single.txt @@ -1,8822 +1,8822 @@ +++++ conf1 1 0 84.76 0 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp32 1 add fp32 1 +1 gpu conv fp32 1 add fp32 1 tanh fp32 1 +2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +3 gpu conv fp32 1 add fp32 1 tanh fp32 1 +4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +5 gpu conv fp32 1 add fp32 1 tanh fp32 1 +6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +7 gpu mul fp32 1 add fp32 1 8 gpu softmax fp32 1 ----- +++++ conf1 2.2528774348 0 84.934166125 0.22583387500000074 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf2 2.64243046275 0 84.865833075 0.2941669250000075 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf3 2.66632133899 0 84.658333775 0.5016662250000025 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf4 2.47811701105 0 84.32499935 0.6525009750000095 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf5 2.71542297875 0 84.612499575 0.5475004250000041 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf6 2.48647461628 0 84.369999825 0.5850002625000172 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf7 2.18607703807 0 84.913332675 0.24666732499999855 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf8 2.520476477 0 84.761666675 0.3983333250000044 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf9 2.14786504266 0 84.611666325 0.5483336750000035 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf10 2.08260997416 0 85.52083355 -0.06083355000000096 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf11 3.26208692053 0 84.43416695 0.48874957499999994 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf12 1.93249146701 0 85.0224993 0.13750069999999825 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf13 2.37819629574 0 84.301667075 0.6874993875000115 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf14 2.59591050603 0 84.633333975 0.5266660250000058 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf15 3.36634957954 0 84.3450008 0.6224988000000167 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf16 2.21727076111 0 84.541666225 0.618333775000005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf17 2.00610810282 0 85.44833395 0.011666049999999484 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf18 2.69064816356 0 84.45250075 0.46124887500000966 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf19 2.46568541903 0 84.61000045 0.549999550000004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf20 3.17089287752 0 84.54416695 0.6158330500000005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf21 2.23579750603 0 84.8641672 0.295832800000008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf22 3.13716146983 0 84.5308332 0.6291668000000016 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf23 2.08260997416 0 85.285833225 0.1741667750000005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf24 2.48647461628 0 84.761667075 0.3983329249999997 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf25 2.15883788221 0 84.53166635 0.6283336500000104 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf26 3.35104871505 0 84.274166675 0.7287499875000023 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf27 2.35917135957 0 84.475000275 0.6849997250000058 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf28 2.87421319527 0 85.28833285 0.17166715000000182 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf29 2.29313137734 0 85.28333225 0.1766677500000043 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf30 2.2528774348 0 84.905833825 0.25416617500000316 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf31 3.04014002515 0 84.4699985 0.6900015000000025 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf32 2.71542297875 0 84.4300005 0.49499924999999934 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf33 2.36674469012 0 85.01249975 0.14750025000000166 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf34 3.15731364232 0 84.24083425 0.7787486249999986 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf35 2.20731326885 0 84.82416635 0.3358336500000064 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf36 2.66632133899 0 84.546667125 0.6133328750000061 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf37 3.25488617683 0 84.4799997 0.6800003000000118 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf38 2.18607703807 0 85.082500325 0.0774996750000071 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf39 3.21935410129 0 84.4591669 0.4512496500000083 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf40 2.81925910919 0 85.035832975 0.12416702499999988 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf41 2.22395915051 0 84.682500925 0.47749907500000577 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf42 2.93135224398 0 85.0933329 0.06666710000001219 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf43 2.09742183942 0 85.404999575 0.055000425000000075 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf44 2.64243046275 0 84.740832575 0.41916742500000626 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf45 2.90250153505 0 84.2641668 0.7437498000000105 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf46 3.48571779877 0 84.3499996 0.6150006000000019 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf47 2.64243046275 0 85.1216655 0.03833449999999916 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf48 2.03940354341 0 84.938333375 0.221666625000006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf49 2.47811701105 0 84.36416765 0.5937485250000094 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf50 2.65671333449 0 85.232499325 0.22750067499999888 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf51 2.81925910919 0 84.83916685 0.3208331500000071 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf52 2.73050807996 0 85.11000015 0.0499998500000004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf53 2.22395915051 0 85.0250004 0.134999600000009 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf54 2.46568541903 0 84.3908332 0.5537502000000032 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf55 1.79160865678 0 85.5 -0.03999999999999487 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf56 2.2528774348 0 84.78166645 0.37833355000000213 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf57 2.32202027362 0 84.4308331 0.4937503500000062 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf58 2.31110203954 0 84.2150001 0.8174998500000115 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf59 2.35917135957 0 84.502500525 0.657499475000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf60 2.66632133899 0 85.018332425 0.14166757500000815 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf61 2.36674469012 0 84.307500125 0.6787498125000013 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf62 1.83623037965 0 84.6108326 0.5491674000000103 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf63 2.18607703807 0 84.27083445 0.7337483250000147 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf64 2.93135224398 0 84.974166625 0.18583337500000996 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf65 2.59591050603 0 84.765834025 0.3941659750000014 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf66 2.25801563131 0 84.43833315 0.4825002749999996 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf67 2.37819629574 0 84.24416645 0.773750325000016 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf68 2.57325940549 0 84.70416665 0.45583335000000036 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf69 2.87421319527 0 84.526665275 0.633334725000006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf70 2.23579750603 0 84.715833025 0.44416697500001023 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf71 2.09742183942 0 85.1474996 0.012500400000001854 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf72 2.09742183942 0 84.730833175 0.42916682500000436 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf73 2.3293565891 0 84.744999675 0.4150003250000026 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf74 1.93249146701 0 84.477499425 0.6825005750000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf75 2.61896392029 0 84.756666625 0.4033333750000111 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf76 2.18607703807 0 84.668333825 0.4916661750000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf77 2.00610810282 0 84.955833075 0.2041669250000041 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf78 2.90250153505 0 85.110833625 0.0491663750000072 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf79 2.81925910919 0 84.373333525 0.5799997124999976 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf80 2.22395915051 0 84.788333875 0.37166612499999874 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf81 2.42513277215 0 84.4791674 0.6808326000000108 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf82 2.69064816356 0 84.63916665 0.5208333499999981 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf83 2.75602585333 0 84.75333345 0.4066665500000056 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf84 2.90250153505 0 85.141666525 0.018333475000000043 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf85 2.50761735877 0 84.514999 0.645001000000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf86 2.65671333449 0 84.705833475 0.45416652499999943 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf87 2.73050807996 0 84.62583325 0.5341667500000057 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf88 1.97124355876 0 84.4249988 0.5025018000000117 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf89 2.90250153505 0 84.8575001 0.30249990000000937 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf90 2.75602585333 0 84.3941676 0.5487486000000033 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf91 2.520476477 0 85.418333225 0.04166677500000732 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf92 2.31110203954 0 84.375833875 0.5762491875000109 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf93 2.75602585333 0 85.0075001 0.15249990000000369 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf94 3.21935410129 0 84.358333425 0.6024998625000109 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf95 2.39753056999 0 84.707500725 0.4524992750000024 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf96 2.36674469012 0 84.974999525 0.18500047500000394 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf97 2.79256263316 0 85.303333425 0.15666657499999986 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf98 3.36634957954 0 84.39666595 0.5450010750000089 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf99 2.40535258985 0 84.7100002 0.44999980000000905 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf100 2.44524097268 0 84.6149996 0.5450004000000007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf101 2.15883788221 0 84.639999725 0.5200002750000096 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf102 2.73050807996 0 85.433333075 0.026666925000012054 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf103 2.73050807996 0 84.5925011 0.5674988999999983 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf104 1.70856970404 0 85.433327 0.026672999999999558 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf105 2.73050807996 0 84.847499875 0.31250012500000823 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf106 2.23579750603 0 84.71749915 0.4425008500000104 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf107 2.66632133899 0 84.71916585 0.44083415000000914 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf108 2.42513277215 0 84.28750065 0.7087490250000101 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf109 2.75602585333 0 85.206666825 0.2533331750000059 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf110 5.11150591832 0 83.79833355 1.4424996750000147 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf111 4.48904909382 0 84.320000225 0.659999662500006 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf112 2.65915985321 0 84.07833405 1.022498925000015 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf113 2.55546827703 0 83.699167225 1.591249162500013 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf114 2.70545850471 0 85.671666075 -0.2116660749999994 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf115 3.39002063361 0 85.46 1.1379786002407855e-14 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf116 3.92713031704 0 84.6025005 0.5574995000000001 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf117 3.44499877858 0 85.18999945 0.27000055000000317 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf118 1.82049363128 0 84.433334 0.4899990000000045 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf119 3.55760541076 0 85.04583425 0.1141657500000065 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf120 2.37454984713 0 85.641666 -0.1816659999999956 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf121 4.09568311293 0 83.7225 1.5562500000000128 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf122 4.81915540131 0 84.38833355 0.5574996750000096 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf123 2.92269861556 0 84.120833175 0.9587502375000057 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf124 2.9489396246 0 83.763333125 1.4950003125000038 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf125 4.11336811599 0 83.815833275 1.4162500875000035 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf126 5.24794714078 0 83.785832925 1.4612506125000166 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf127 3.02145143763 0 84.1524996 0.9112506000000096 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf128 4.68889675944 0 84.0608331 1.048750350000013 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf129 3.11726491618 0 84.2183331 0.8125003500000147 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf130 3.42944293235 0 84.7625002 0.39749979999999996 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf131 2.86589315985 0 85.57166635 -0.11166634999999586 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf132 1.9596080833 0 84.298332425 0.6925013625000105 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf133 3.21617930472 0 84.1633334 0.8949999000000091 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf134 4.94025067672 0 84.1550001 0.9074998500000149 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf135 3.06572586948 0 84.4866671 0.6733328999999998 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf136 4.00665156404 0 84.274166525 0.7287502125000103 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf137 1.82049363128 0 84.166664 0.8900040000000118 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf138 3.08174415116 0 84.457499125 0.45375131250001033 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf139 3.65459743556 0 84.9066661 0.25333390000000977 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf140 2.14786504266 0 84.365833225 0.5912501625000033 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf141 2.28268924961 0 84.1691664 0.8862504000000158 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf142 3.52359981675 0 85.0375003 0.12249970000000021 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf143 1.81032878247 0 83.433342 1.9899870000000135 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf144 4.33022330151 0 84.0491668 1.0662498000000156 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf145 3.29121134217 0 83.716666775 1.564999837500018 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf146 2.95195426078 0 85.093332675 0.06666732500000594 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf147 3.0750779935 0 85.936666825 -0.4766668249999981 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf148 3.74747795825 0 85.196666675 0.2633333250000021 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf149 2.65915985321 0 84.4224995 0.5062507500000066 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf150 3.1142881864 0 85.5100001 -0.05000009999999405 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf151 1.9596080833 0 85.2125008 0.2474992000000043 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf152 3.12425627658 0 84.286666275 0.710000587500005 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf153 3.38956467165 0 84.857499125 0.3025008750000012 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf154 3.42944293235 0 83.970834125 1.1837488125000135 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf155 4.78009885505 0 83.987500375 1.1587494375000134 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf156 3.06572586948 0 84.009167425 1.126248862500006 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf157 3.25488617683 0 85.202499925 0.2575000750000072 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf158 3.39383892936 0 84.92416725 0.2358327500000087 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf159 2.71316395691 0 84.80000005 0.35999995000001095 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf160 4.42174155156 0 83.748334025 1.5174989624999995 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf161 3.65459743556 0 85.078332675 0.08166732500000651 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf162 2.8716823693 0 84.841666425 0.3183335750000055 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf163 5.03217204616 0 83.8383343 1.3824985500000082 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf164 1.58558171041 0 85.099998 0.06000200000000577 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf165 4.68074200425 0 83.841666625 1.3775000625000047 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf166 3.1883620737 0 84.814999575 0.3450004250000035 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf167 4.57189457086 0 83.503334175 1.8849987374999984 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 4 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf168 4.8992150082 0 83.6158339 1.71624915000001 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf169 5.06758855666 0 83.938332725 1.232500912500015 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf170 3.70045496902 0 85.094167475 0.06583252499999903 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf171 4.85009935161 0 83.74416815 1.5237477750000181 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf172 1.88313156795 0 84.26667 0.7399950000000004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf173 2.32388897879 0 86.20500055 -0.745000549999989 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf174 2.70545850471 0 85.20833245 0.2516675500000048 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf175 3.40522010623 0 84.827500525 0.3324994750000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf176 4.11856269266 0 83.90916675 1.276249875000012 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf177 3.34760900076 0 84.98499845 0.17500154999999895 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf178 4.39538031203 0 84.335832975 0.636250537500004 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf179 1.91380012618 0 85.55583345 -0.09583344999998927 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf180 4.42174155156 0 83.86583365 1.3412495250000092 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf181 5.15619113861 0 83.95916635 1.201250475000002 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf182 3.20891955001 0 85.7524994 -0.2924993999999998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf183 4.1422563221 0 84.29833355 0.6924996750000147 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 4 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf184 2.42917048367 0 84.279999875 0.7200001875000055 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf185 2.34066577715 0 84.152500325 0.9112495124999995 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf186 5.11150591832 0 83.97500015 1.1774997750000082 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf187 3.06891619039 0 84.6966661 0.4633339000000035 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf188 3.11726491618 0 85.584167775 -0.12416777499999226 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf189 2.40366321762 0 84.94250095 0.21749905000000924 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf190 3.10452256642 0 85.5149996 -0.05499959999999077 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf191 3.27701048398 0 84.047500025 1.0687499625 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf192 5.11150591832 0 84.306666725 0.6799999125000085 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf193 4.49523657385 0 83.747500025 1.5187499625000171 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf194 4.0126251583 0 84.09333305 1.0000004250000103 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf195 3.39354810299 0 85.572498425 -0.11249842500000112 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf196 4.68889675944 0 84.13833325 0.9325001250000042 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf197 2.23931936018 0 84.62500045 0.5349995500000034 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf198 2.86615984136 0 84.4133332 0.5200002000000126 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf199 3.74747795825 0 83.530000675 1.8449989875000128 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf200 5.20166453319 0 83.9658329 1.1912506500000148 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf201 3.19188638661 0 84.4616671 0.6983329000000055 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf202 2.32402535693 0 85.540833225 -0.08083322499999496 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf203 3.27302525995 0 85.705001075 -0.2450010749999933 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf204 3.85022285799 0 84.412500525 0.5212492125000097 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 4 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf205 2.53594802291 0 84.8191675 0.3408324999999991 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf206 2.32202027362 0 85.30666595 0.15333405000000938 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf207 4.46188167251 0 84.245833625 0.7712495625000031 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf208 3.05626197049 0 84.35000015 0.6149997750000082 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf209 2.54246041813 0 85.59333365 -0.13333365000000014 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf210 2.36674469012 0 85.26166675 0.19833325000000174 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf211 4.85885544059 0 84.03500025 1.0874996250000137 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf212 3.01527286358 0 85.25166645 0.20833355000000325 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf213 3.77626979527 0 84.515833225 0.6441667750000107 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf214 2.48024552199 0 84.81833225 0.3416677500000077 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf215 3.78064739543 0 83.686667275 1.6099990875000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf216 4.0017352815 0 84.64333385 0.5166661500000004 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf217 3.30967967793 0 85.17083325 0.28916675000000397 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf218 4.1480827235 0 84.016667025 1.114999462500002 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf219 3.89600156508 0 84.155833625 0.9062495625000082 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf220 3.40559661135 0 85.3216663 0.13833370000000117 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf221 3.07821945478 0 83.707499875 1.5787501875000132 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf222 3.52741090964 0 84.353332275 0.6100015875000082 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 5 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf223 2.36674469012 0 85.527500125 -0.06750012499999797 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf224 2.1110784986 0 85.359999775 0.10000022499999944 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf225 4.21929995362 0 84.450000275 0.46499958750001724 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf226 2.64029120901 0 84.650000775 0.5099992250000099 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf227 4.60110969497 0 84.265832875 0.7412506875000062 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf228 5.1195298246 0 83.73333345 1.5399998250000024 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf229 2.57325940549 0 85.131667 0.028333000000012043 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf230 3.80503735759 0 83.965000175 1.1924997375000075 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 4 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf231 4.78009885505 0 83.742500525 1.5262492125000122 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf232 2.79017349628 0 85.048331775 0.11166822500001106 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf233 2.85479601915 0 84.0425001 1.0762498500000106 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf234 2.45162045063 0 84.321666925 0.6574996125000041 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf235 3.51061454803 0 83.744166775 1.523749837500013 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf236 2.46382267142 0 85.251666475 0.20833352500001184 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf237 3.40522010623 0 84.095 0.9975000000000094 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf238 4.48904909382 0 84.18750025 0.8587496250000086 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf239 3.50267448344 0 83.673333375 1.6299999375000098 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf240 3.89600156508 0 84.308333675 0.6774994875000075 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf241 4.42774475186 0 83.916665825 1.2650012625000144 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf242 4.51654937482 0 83.63583415 1.686248775000017 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf243 3.42944293235 0 83.6958342 1.596248700000018 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf244 3.57011348802 0 85.546666925 -0.08666692499999157 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf245 4.11336811599 0 84.458333575 0.45249963750001143 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf246 4.1715531621 0 84.187499775 0.8587503374999983 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf247 4.55843741409 0 83.9508335 1.2137497500000052 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf248 2.54900634755 0 83.861666275 1.3475005875000008 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf249 2.09298780883 0 83.450833675 1.96374948750001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf250 3.27701048398 0 83.801666175 1.4375007375000166 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf251 2.51832413986 0 84.13666705 0.934999425000008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf252 3.16448600486 0 83.7658337 1.491249450000005 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf253 3.32835645011 0 83.675832875 1.6262506875000113 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf254 2.33141358263 0 83.9124993 1.2712510500000178 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf255 4.27374553867 0 83.818334725 1.4124979125000081 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf256 5.20166453319 0 84.0674998 1.038750300000018 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf257 4.23140792301 0 84.044167075 1.0737493875000013 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf258 1.92507021073 0 84.9625009 0.19749910000001025 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf259 3.65459743556 0 84.96083255 0.1991674499999988 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf260 2.13391055752 0 85.82416905 -0.3641690499999896 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf261 3.2442204033 0 85.1583328 0.0016672000000085285 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf262 2.59819756789 0 83.854999825 1.357500262500018 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 4 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf263 2.49704621765 0 85.134999525 0.02500047500000735 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf264 2.34066577715 0 85.0483337 0.11166630000000455 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf265 3.38956467165 0 84.955000075 0.2049999250000042 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf266 2.74581611742 0 84.07250065 1.0312490250000153 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf267 4.94025067672 0 83.68916795 1.6062480750000105 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf268 3.26966664615 0 85.507500275 -0.04750027499999304 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf269 3.19188638661 0 85.40666595 0.053334050000000854 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf270 2.36674469012 0 85.0666672 0.09333280000000743 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf271 3.00947885334 0 84.567500175 0.5924998249999988 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf272 3.08174415116 0 84.0216675 1.1074987499999978 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf273 2.58475221483 0 83.989166175 1.1562507375000166 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf274 3.8653624219 0 84.400832725 0.5387509125000065 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf275 3.70045496902 0 84.84583205 0.31416795000000663 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf276 3.25488617683 0 85.282499875 0.17750012500000595 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf277 4.29866382416 0 83.777499575 1.473750637500018 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf278 2.28075482883 0 85.216665825 0.24333417499999827 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf279 4.46188167251 0 84.554166075 0.6058339250000074 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf280 2.59384588291 0 83.8424995 1.376250750000004 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf281 3.57011348802 0 85.430833625 0.0291663749999998 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf282 4.749306487 0 84.22583335 0.8012499750000046 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf283 2.13553800524 0 85.10166645 0.05833355000000895 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf284 2.46382267142 0 83.962499225 1.1962511625000047 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf285 4.78009885505 0 84.114167425 0.9687488625 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf286 3.20576529027 0 84.00249995 1.1362500750000066 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf287 3.67738324523 0 85.02916775 0.13083225000000598 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf288 3.86487740325 0 84.034167125 1.0887493125000134 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 4 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf289 3.370196653 0 83.727500375 1.5487494374999997 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf290 3.00947885334 0 84.167499375 0.8887509374999993 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf291 3.21617930472 0 85.604167575 -0.14416757500000016 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf292 2.68330363357 0 83.835833525 1.3862497125000104 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 4 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf293 2.2528774348 0 84.1324997 0.9412504500000125 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf294 2.56208144611 0 83.70083425 1.588748625000008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf295 4.23690511031 0 84.03583255 1.0862511750000152 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf296 3.37064741875 0 83.596665525 1.7450017124999988 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf297 3.04987324632 0 84.694167575 0.46583242500001065 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf298 4.1715531621 0 84.387501175 0.5587482375000121 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf299 2.47192966909 0 84.504999275 0.6550007250000022 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 5 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf300 5.07547506828 0 83.71083285 1.5737507250000036 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf301 3.04987324632 0 84.300833725 0.6887494125000018 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf302 3.44136359344 0 85.140833475 0.01916652500001137 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf303 2.35356305584 0 84.982499375 0.17750062500000185 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf304 3.31339824504 0 85.795832875 -0.335832874999997 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf305 3.39354810299 0 84.9266668 0.23333319999999846 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf306 2.90250153505 0 84.3316671 0.6424993500000014 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf307 2.58458352145 0 84.3508339 0.613749150000011 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf308 3.07218125113 0 85.380832525 0.07916747500000271 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf309 4.42774475186 0 83.895833025 1.296250462500005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf310 2.46803429972 0 83.817500275 1.413749587500007 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf311 4.48904909382 0 83.6108329 1.7237506499999995 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf312 2.53594802291 0 85.054167025 0.10583297500000699 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf313 4.63000451649 0 84.4649997 0.6950002999999981 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf314 2.68114957789 0 84.820833775 0.33916622500000815 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf315 3.34760900076 0 85.23750045 0.22249955000000626 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf316 2.50557750147 0 84.5375006 0.6224994000000038 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf317 4.60761017185 0 83.7708333 1.4837500499999976 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf318 2.8605404372 0 84.4941671 0.6658329000000066 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf319 2.30748540935 0 85.6141659 -0.15416589999999813 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf320 4.85885544059 0 84.37666675 0.5749998750000103 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf321 2.02960847562 0 85.007501175 0.15249882500000356 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf322 2.95809675652 0 85.295832475 0.1641675250000077 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf323 2.52912274255 0 83.908333625 1.2774995625000116 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 3 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf324 3.46562856844 0 85.1783329 0.2816671000000042 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf325 4.35642257412 0 84.435832725 0.4862509125000116 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf326 4.42174155156 0 84.484166675 0.6758333250000078 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf327 3.42944293235 0 85.258334 0.20166600000000018 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf328 2.9200817227 0 85.425833725 0.03416627500000119 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf329 2.12312397991 0 85.304999325 0.15500067500000797 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf330 3.39002063361 0 84.330833225 0.6437501624999982 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf331 2.19091471805 0 84.1649996 0.8925006000000053 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf332 4.35642257412 0 84.2174994 0.8137509000000165 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf333 3.30640319553 0 83.7333332 1.5400002000000015 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf334 3.35104871505 0 85.510834475 -0.050834474999990664 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf335 2.7823322265 0 84.299167475 0.6912487875000011 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf336 1.79160865678 0 83.333336 2.1399960000000036 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf337 1.82049363128 0 83.633339 1.6899914999999979 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf338 2.86589315985 0 85.0425013 0.11749870000000728 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf339 4.85885544059 0 84.0366665 1.0850002500000144 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf340 2.73557376185 0 85.156665975 0.003334025000009455 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf341 2.8168240943 0 84.806666 0.35333399999999815 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf342 5.02441943023 0 83.959166925 1.2012496124999998 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf343 4.38883904114 0 84.42666685 0.4999997250000021 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf344 3.97476744493 0 84.278334475 0.7224982875000165 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 4 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf345 2.58458352145 0 85.45583365 0.004166350000002705 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf346 2.51626683335 0 85.00499975 0.15500025000000905 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf347 3.89600156508 0 83.8199999 1.410000150000009 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf348 4.67403368929 0 84.041666625 1.0775000625000004 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf349 3.65869733242 0 84.625833975 0.534166024999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf350 3.44499877858 0 84.2925001 0.7012498500000106 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf351 2.33141358263 0 85.58333305 -0.12333304999998801 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf352 4.38883904114 0 83.934167675 1.2387484875000112 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 4 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf353 3.04357800698 0 85.43666725 0.023332750000005842 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf354 4.42774475186 0 83.994167575 1.148748637499999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf355 4.06183643479 0 83.865833225 1.3412501625000033 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf356 2.49704621765 0 83.978333525 1.172499712500013 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf357 2.29508685841 0 85.124999725 0.03500027500001013 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf358 3.88013266186 0 83.978333325 1.1725000125000165 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf359 3.30640319553 0 85.405833675 0.05416632500000845 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf360 3.19188638661 0 83.76416665 1.493750024999997 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf361 4.11336811599 0 84.49083295 0.6691670500000072 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf362 3.52359981675 0 84.929999375 0.23000062500001095 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf363 3.65869733242 0 84.155833425 0.9062498625000117 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf364 2.63326868545 0 83.545833975 1.8212490375000172 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf365 2.73835708776 0 84.59083395 0.5691660500000012 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf366 1.94407886078 0 84.159166525 0.9012502125000026 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf367 3.27701048398 0 84.339999725 0.63000041250001 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf368 2.8439887133 0 83.61499925 1.7175011250000125 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf369 2.98806898383 0 84.2358331 0.7862503500000173 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 5 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf370 5.20166453319 0 83.64583405 1.6712489250000004 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf371 2.38597460634 0 85.628333575 -0.16833357499999407 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf372 3.20576529027 0 84.0533323 1.0600015500000168 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf373 3.48571779877 0 85.29583275 0.1641672500000027 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf374 3.12725210162 0 83.810833375 1.4237499375000056 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf375 5.00735701676 0 83.70333305 1.5850004250000111 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf376 2.7586587916 0 84.016666475 1.115000287500017 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf377 3.59623494226 0 84.02083355 1.1087496749999985 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf378 3.07218125113 0 84.863333575 0.2966664250000065 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf379 5.15619113861 0 84.0725 1.03125 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf380 3.30640319553 0 83.986665925 1.1600011125000123 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf381 2.93135224398 0 84.207499925 0.8287501125000176 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf382 4.85885544059 0 83.6624998 1.6462502999999984 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf383 3.48571779877 0 83.728333825 1.547499262499997 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf384 3.39002063361 0 83.503332775 1.8850008375000016 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf385 4.42174155156 0 84.431667375 0.4924989375000024 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf386 4.67403368929 0 83.978332625 1.172501062500018 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf387 3.46562856844 0 85.096666525 0.06333347500000175 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf388 5.1195298246 0 83.626666625 1.7000000625000098 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf389 3.42944293235 0 83.7425005 1.5262492499999993 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf390 4.1480827235 0 83.877500325 1.323749512500008 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf391 2.94627555145 0 83.535000025 1.8374999625000044 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 5 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf392 3.48199621824 0 85.194167075 0.2658329250000094 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf393 5.29506075557 0 83.740000375 1.5299994375000168 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf394 3.39002063361 0 85.356666 0.10333400000000098 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf395 2.61686247873 0 83.76666665 1.4900000250000005 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf396 3.80013422222 0 83.9200003 1.2599995500000105 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf397 3.84520265677 0 84.26749955 0.7387506750000128 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf398 3.85022285799 0 84.3808335 0.5687497500000163 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 4 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf399 1.88204484827 0 84.4200002 0.5099997000000016 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf400 3.20576529027 0 83.7333335 1.5399997500000069 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf401 4.8661051796 0 83.832500275 1.3912495875000062 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf402 2.55546827703 0 85.2574993 0.2025006999999988 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf403 2.60969359339 0 85.549166275 -0.08916627499999946 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf404 3.15056754597 0 84.719165625 0.4408343750000029 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf405 3.44136359344 0 84.57583365 0.5841663499999982 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf406 2.56229458097 0 83.766666575 1.4900001375000045 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf407 4.94025067672 0 84.08000015 1.0199997750000023 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf408 4.0017352815 0 84.265833375 0.7412499375000081 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf409 2.81662374994 0 85.6191671 -0.1591670999999934 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf410 2.84104486338 0 83.69583365 1.5962495250000117 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 4 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf411 3.84974163103 0 83.6866675 1.6099987500000097 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf412 3.32835645011 0 85.147500375 0.012499624999998127 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf413 3.20535754348 0 84.6524999 0.50750010000001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf414 3.92713031704 0 84.51583365 0.6441663500000004 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf415 3.65459743556 0 84.525833325 0.6341666750000116 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf416 4.42174155156 0 84.50833295 0.6516670500000089 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf417 3.10452256642 0 84.8075001 0.3524999000000065 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf418 3.56620961403 0 85.090833625 0.06916637500000322 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf419 2.61205424073 0 84.2916662 0.7025007000000159 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 31 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf420 2.30954242221 0 83.91000015 1.2749997750000048 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf421 2.32388897879 0 85.4341668 0.025833200000005274 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf422 3.16448600486 0 85.372500475 0.08749952500000974 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf423 2.68114957789 0 83.6916668 1.6024998000000181 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf424 3.40166834257 0 83.9191672 1.2612492000000017 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf425 2.32953276127 0 85.559166375 -0.09916637499999864 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf426 2.93135224398 0 84.9558331 0.20416689999999849 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf427 2.98202604677 0 83.9716663 1.1825005500000145 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf428 4.29300531776 0 84.635832975 0.5241670250000056 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf429 4.46188167251 0 83.8666668 1.339999800000001 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf430 2.29313137734 0 83.5649998 1.7925003000000146 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf431 3.07180677592 0 85.439167075 0.02083292500000483 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf432 3.66367779072 0 83.75999915 1.5000012750000096 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf433 4.94025067672 0 84.2616668 0.747499800000007 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf434 4.85885544059 0 83.932499875 1.2412501875000004 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf435 3.28789331155 0 84.913333525 0.24666647500000638 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf436 5.11150591832 0 83.850832525 1.3637512125000057 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf437 3.48571779877 0 84.99583365 0.16416635000001067 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf438 2.42513277215 0 85.244167075 0.21583292500001222 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv samp 33 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf439 3.17463312783 0 84.883333125 0.2766668750000122 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf440 3.74747795825 0 84.98666605 0.1733339500000085 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf441 2.90250153505 0 84.057500025 1.0537499625000137 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 35 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf442 3.04987324632 0 84.386666325 0.560000512500018 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf443 3.05943261456 0 83.763333325 1.4950000125000003 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 29 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf444 2.98202604677 0 84.424166475 0.5037502874999973 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf445 3.73279362334 0 84.47166705 0.6883329500000116 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf446 4.28054278574 0 84.365000175 0.592499737499999 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf447 4.749306487 0 84.19499995 0.8475000750000135 -1 gpu conv perf 23 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf448 2.41129462935 0 85.37333335 0.08666665000000934 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf449 4.62205314759 0 84.094166925 0.9987496125000135 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf450 3.58748542243 0 83.93999855 1.2300021750000099 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf451 3.35868172117 0 84.358334225 0.6024986625000182 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf452 3.2442204033 0 83.892500775 1.3012488375000046 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf453 3.47749717017 0 84.054167525 1.0587487125000123 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf454 4.6004224797 0 83.909999675 1.2750004875000158 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf455 2.74581611742 0 83.711667625 1.5724985625000016 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf456 3.55728584143 0 82.854999725 2.857500412500009 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 3 4 promise swing_level 4 5 promise swing_level 7 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf457 4.40789063084 0 84.004166675 1.1337499875000177 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 4 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf458 3.23000726883 0 83.2958329 2.1962506500000174 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf459 3.90066133282 0 83.319168 2.1612480000000005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf460 4.68074200425 0 83.734165525 1.5387517125000159 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf461 3.25488617683 0 84.3124998 0.6712503000000112 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf462 4.17689583184 0 84.9249996 0.23500039999999844 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf463 3.80013422222 0 84.0683336 1.0374996000000039 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf464 3.80013422222 0 83.8733328 1.3300008000000076 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf465 3.53595970915 0 83.400833725 2.0387494125000103 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf466 4.56481779115 0 83.8733324 1.3300014000000147 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf467 4.27374553867 0 83.748333725 1.5174994125000154 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf468 3.0122585054 0 83.455833075 1.9562503875000061 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf469 4.78711530883 0 83.65750105 1.6537484250000176 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf470 2.69064816356 0 83.47416785 1.9287482250000068 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf471 2.97872563985 0 84.6950001 0.4649999000000037 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf472 2.96979047613 0 83.0250004 2.6024994000000135 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf473 3.12725210162 0 83.171666475 2.382500287500015 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf474 3.38956467165 0 85.40000015 0.05999985000000835 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf475 5.50155459048 0 83.673333075 1.6300003875000044 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf476 4.56481779115 0 83.7641668 1.4937498000000105 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf477 4.49523657385 0 83.826667975 1.3999980374999978 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf478 4.17689583184 0 83.59416615 1.7487507749999978 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf479 4.77162407078 0 83.575000575 1.7774991375000013 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf480 3.84974163103 0 83.948332575 1.2175011375000153 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf481 4.36224960626 0 83.728333275 1.547500087500012 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf482 3.61386264477 0 83.7358343 1.5362485500000176 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf483 5.1195298246 0 83.2549999 2.2575001500000056 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf484 3.51061454803 0 84.35999905 0.6000014250000092 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf485 4.68074200425 0 84.0499998 1.0650003000000154 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf486 3.70465846079 0 84.597500425 0.5624995749999983 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf487 2.99080920887 0 83.2541668 2.258749800000018 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf488 3.31339824504 0 84.869167175 0.29083282500000396 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf489 3.0527280537 0 84.244166775 0.7737498375000129 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf490 4.49523657385 0 83.6258349 1.7012476500000062 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf491 2.03940354341 0 83.530000975 1.8449985375000182 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf492 3.30234620526 0 83.769999675 1.4850004875000167 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf493 4.11856269266 0 82.9658334 2.6912499000000167 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf494 2.56430926229 0 83.56583275 1.79125087500001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf495 4.06183643479 0 83.384999475 2.0625007875000065 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf496 3.54436876665 0 83.98666645 1.1600003250000057 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 6 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf497 4.49523657385 0 84.877499925 0.28250007500001006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf498 3.07180677592 0 84.006665775 1.1300013375000049 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf499 4.41448885793 0 84.739167225 0.4208327750000024 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 3 5 promise swing_level 6 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf500 4.8661051796 0 83.76833265 1.4875010250000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf501 3.03083319426 0 83.9025001 1.2862498500000115 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf502 4.39475317355 0 83.85416775 1.3587483750000047 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf503 3.13382775829 0 83.087500375 2.5087494375000006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf504 4.8661051796 0 83.73499935 1.5375009750000146 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf505 4.40131209873 0 83.6399992 1.6800011999999995 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf506 4.71064890814 0 83.84750095 1.3687485750000121 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf507 2.66150866558 0 83.75666565 1.5050015250000044 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf508 2.86589315985 0 83.689166825 1.6062497625000063 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf509 4.53672840544 0 83.38583315 2.0612502750000132 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf510 3.39354810299 0 83.37083375 2.0837493750000036 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf511 4.29866382416 0 83.6133328 1.7200008000000153 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf512 2.64960329927 0 83.56416675 1.7937498750000103 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf513 5.1195298246 0 83.70499985 1.5825002250000182 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf514 4.62205314759 0 84.531667025 0.6283329750000007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 5 promise swing_level 6 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf515 3.51061454803 0 84.77166675 0.38833325000001084 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf516 4.56481779115 0 83.193332675 2.3500009875000174 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 7 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf517 4.47477396827 0 83.7516658 1.512501300000011 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 4 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf518 3.80013422222 0 83.5333328 1.8400008000000128 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf519 3.29121134217 0 83.777499575 1.473750637500018 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf520 4.17098826512 0 84.67083305 0.4891669500000063 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 6 5 promise swing_level 6 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf521 4.42774475186 0 83.91999975 1.2600003750000042 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf522 4.98960168004 0 83.666666775 1.6399998375000138 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf523 5.40080120652 0 83.67083355 1.6337496750000113 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf524 4.23690511031 0 84.887500325 0.2724996750000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf525 2.93433101084 0 84.92166715 0.23833285000000048 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 6 5 promise swing_level 6 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf526 4.68074200425 0 83.80833305 1.4275004250000052 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf527 4.1533654053 0 83.0999996 2.490000600000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf528 3.52741090964 0 84.39583325 0.5462501250000145 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf529 3.29121134217 0 85.70333195 -0.24333195000000102 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf530 4.8661051796 0 83.547500225 1.818749662500018 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf531 3.15056754597 0 84.0058332 1.1312502000000109 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf532 4.32387438839 0 82.97166645 2.6825003250000066 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf533 2.80584737113 0 83.8400008 1.3799988000000099 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf534 3.8397168061 0 83.173333575 2.3799996375000063 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf535 4.00665156404 0 84.0058329 1.1312506500000055 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf536 5.60613859814 0 83.570832975 1.783750537500005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf537 2.49282474144 0 83.272499025 2.231251462500005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 6 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf538 3.99579404276 0 83.340832675 2.1287509875000055 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf539 3.3549420805 0 83.699166625 1.5912500625000021 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf540 4.60761017185 0 83.509999925 1.875000112500004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf541 2.39753056999 0 83.39333175 2.0500023750000054 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf542 2.99080920887 0 83.24750015 2.2687497750000176 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf543 3.51061454803 0 84.308333575 0.6774996374999986 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf544 4.27374553867 0 83.973333025 1.1800004625000042 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf545 4.33022330151 0 83.722499325 1.556251012500006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 4 4 promise swing_level 7 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf546 4.56481779115 0 83.53500085 1.8374987250000032 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf547 4.09568311293 0 84.77583305 0.38416695000000234 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 6 5 promise swing_level 7 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf548 2.92269861556 0 83.676667275 1.624999087500008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf549 3.90066133282 0 84.114167575 0.9687486375000134 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf550 2.30748540935 0 85.19333455 0.26666545000000214 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf551 3.81982297414 0 83.2883327 2.2075009500000107 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf552 2.48647461628 0 84.136666175 0.9350007375000047 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf553 4.06183643479 0 84.096666475 0.9950002874999981 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf554 3.29121134217 0 83.677498825 1.6237517625000066 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf555 4.62205314759 0 83.57666685 1.774999725000015 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf556 4.42774475186 0 83.749999975 1.515000037500016 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf557 3.46931522498 0 84.18249955 0.8662506750000034 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf558 2.16999135568 0 84.5841665 0.5758335000000102 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 5 promise swing_level 7 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf559 3.80013422222 0 83.679166625 1.6212500625000175 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf560 4.48157410599 0 83.7274995 1.5487507500000177 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf561 4.23690511031 0 84.0941674 0.9987489000000025 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf562 5.40080120652 0 83.426666225 2.0000006624999997 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf563 4.36224960626 0 83.5524996 1.811250600000001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf564 5.19163058913 0 83.597499825 1.7437502625000079 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf565 3.57877837384 0 84.496667225 0.6633327750000092 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf566 3.93186488986 0 83.839999775 1.3800003375000145 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf567 4.41448885793 0 83.87999955 1.320000675000017 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf568 3.24057963994 0 84.304166675 0.6837499875000006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf569 3.61386264477 0 84.019999425 1.1100008625000157 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf570 5.1195298246 0 83.2475002 2.2687497000000008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf571 4.8661051796 0 83.777500025 1.4737499625000154 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf572 3.39354810299 0 82.96583305 2.691250425000007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf573 5.45071234571 0 83.635833125 1.6862503125000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf574 3.47749717017 0 84.45083235 0.46375147500000935 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf575 4.36224960626 0 84.5974989 0.5625011000000001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf576 3.65869733242 0 85.481666225 -0.021666224999992767 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf577 5.20997422309 0 83.5750004 1.7774994000000177 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf578 3.05595874161 0 83.3591666 2.1012501000000157 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf579 3.80013422222 0 84.399166125 0.5412508125000173 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf580 3.74226721787 0 84.9525001 0.2074999000000105 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 6 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf581 3.24771715301 0 84.51416745 0.645832550000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf582 4.00665156404 0 83.50249935 1.8862509750000171 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf583 2.80051833724 0 84.685833875 0.474166125000005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 5 promise swing_level 6 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf584 4.56481779115 0 82.9549998 2.7075003000000137 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf585 3.24057963994 0 84.2116667 0.8224999500000152 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf586 4.21269566507 0 83.9266659 1.2500011500000028 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf587 3.39354810299 0 83.1466669 2.4199996500000083 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf588 5.20997422309 0 83.466666825 1.9399997625000012 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf589 3.73279362334 0 83.930000175 1.2449997375000024 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf590 4.18876131089 0 84.155000125 0.9074998125000064 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf591 3.88524454546 0 84.22166635 0.8075004749999977 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 4 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf592 3.57011348802 0 84.1708333 0.8837500500000104 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf593 3.84974163103 0 83.740833875 1.5287491874999972 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf594 3.52741090964 0 85.19999995 0.2600000499999993 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf595 2.0006931511 0 83.368333425 2.087499862500003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf596 2.39952284113 0 83.755833075 1.5062503875000104 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf597 5.03217204616 0 83.551667425 1.8124988625 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf598 2.3293565891 0 85.089168 0.07083200000000434 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf599 3.23709823473 0 83.13499875 2.4375018750000166 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf600 3.09112547159 0 84.906667725 0.25333227499999966 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf601 4.44108049557 0 83.01749995 2.613750075000006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf602 4.71064890814 0 83.3824997 2.0662504500000125 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf603 4.8661051796 0 83.372500575 2.081249137500002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf604 3.19840480446 0 84.269166725 0.7362499124999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf605 4.06183643479 0 83.374165925 2.078751112500008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf606 3.46931522498 0 83.883333175 1.3150002375000014 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf607 3.65869733242 0 84.564166025 0.5958339749999993 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf608 3.84974163103 0 84.50499975 0.655000250000009 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf609 3.0060522373 0 83.2666666 2.2400001000000174 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf610 4.29866382416 0 84.190832875 0.8537506875000105 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf611 2.52912274255 0 84.520834775 0.6391652250000078 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf612 3.65869733242 0 85.08833295 0.07166705000001061 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf613 2.93426889145 0 83.395833075 2.0462503875000095 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 4 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf614 3.72807479022 0 84.123332375 0.9550014375000018 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf615 5.03217204616 0 83.560833775 1.7987493374999985 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf616 4.11281886651 0 84.95750045 0.2024995500000074 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 6 5 promise swing_level 6 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf617 3.0750779935 0 85.0858345 0.07416550000000088 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 5 promise swing_level 7 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf618 4.17689583184 0 83.297500675 2.1937489875000153 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf619 2.69556689425 0 84.925000575 0.2349994250000066 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf620 3.02145143763 0 83.3250005 2.1524992500000053 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf621 3.535869506 0 85.07499905 0.08500095000000274 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 6 5 promise swing_level 7 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf622 3.2442204033 0 84.0949995 0.9975007500000075 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf623 3.0184247196 0 83.844166 1.3737510000000057 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf624 4.28616846517 0 84.8858338 0.2741662000000048 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf625 4.02854458864 0 83.6858329 1.6112506500000165 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf626 2.63299356219 0 83.69916615 1.591250775000013 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf627 3.80013422222 0 83.6391668 1.6812498000000105 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf628 3.61386264477 0 84.3174999 0.6637501500000056 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf629 4.78711530883 0 83.6666668 1.6399998000000053 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf630 2.93433101084 0 85.0549992 0.10500080000000767 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 6 5 promise swing_level 6 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf631 4.24911447842 0 83.8516655 1.362501750000014 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf632 5.1195298246 0 83.624166275 1.7037505875000178 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf633 4.00665156404 0 83.2800001 2.219999850000015 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf634 4.8661051796 0 83.880833225 1.3187501625000024 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf635 3.32835645011 0 83.3925005 2.051249250000012 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf636 4.29866382416 0 84.145833375 0.9212499375000149 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf637 2.65671333449 0 83.742498925 1.5262516124999976 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf638 3.0750779935 0 84.079998875 1.020001687500006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf639 5.20997422309 0 83.537500425 1.8337493625000008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf640 3.17089287752 0 83.099998875 2.490001687500012 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf641 4.63658695371 0 83.5449995 1.8225007500000032 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf642 3.3549420805 0 83.34 2.1300000000000026 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf643 3.75178902153 0 84.438333425 0.4824998625000134 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf644 3.35104871505 0 84.717499525 0.44250047500001133 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf645 2.34416917693 0 84.8849997 0.27500030000001063 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 5 promise swing_level 6 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf646 2.5118891277 0 82.94666695 2.719999575000017 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf647 4.94774553187 0 83.433333375 1.9899999375000021 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf648 4.42774475186 0 83.6849998 1.6125003000000078 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf649 3.43305296381 0 83.0583327 2.5525009500000166 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf650 4.6004224797 0 83.8491669 1.3662496500000074 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf651 3.31720456502 0 83.159166925 2.401249612500017 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf652 3.95294609782 0 84.73250125 0.4274987500000066 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 5 5 promise swing_level 7 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf653 5.45071234571 0 83.452499775 1.9612503374999974 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf654 4.71064890814 0 83.414999175 2.0175012374999994 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf655 4.29866382416 0 83.088333125 2.5075003124999995 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf656 2.83285757459 0 84.964999975 0.19500002500000735 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 7 5 promise swing_level 6 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf657 2.93135224398 0 83.37833325 2.072500125000012 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf658 4.27374553867 0 83.634166725 1.6887499125000076 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf659 3.73279362334 0 83.000833325 2.6387500125000045 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 5 promise swing_level 7 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf660 3.36634957954 0 83.085833475 2.511249787500006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf661 2.7586587916 0 83.30083345 2.1887498250000093 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf662 4.64388818634 0 83.463332725 1.9450009125000065 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 4 4 promise swing_level 6 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf663 3.36634957954 0 84.3250002 0.6524996999999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf664 4.6659305061 0 83.376666675 2.0749999875000142 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 6 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf665 4.23690511031 0 84.83333285 0.32666715000000013 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 5 5 promise swing_level 7 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf666 2.93135224398 0 84.160833275 0.8987500875000052 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf667 2.95195426078 0 84.90749935 0.2525006500000103 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 5 5 promise swing_level 7 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf668 4.53672840544 0 83.541666425 1.827500362500004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf669 2.34416917693 0 84.857499925 0.3025000750000061 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 5 promise swing_level 6 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf670 3.48984270518 0 84.41999995 0.5100000750000007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf671 3.0527280537 0 83.764167575 1.4937486375000049 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf672 3.25488617683 0 83.087500625 2.5087490625000015 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf673 4.71064890814 0 83.144999575 2.4225006375000078 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf674 3.26931959511 0 85.326667 0.13333300000000464 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf675 5.20997422309 0 83.419166925 2.011249612500009 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf676 3.43696886837 0 84.8083328 0.35166720000000284 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 6 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf677 2.68574735107 0 83.555000175 1.8074997375000024 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf678 4.33659088676 0 84.000833925 1.1387491125000153 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf679 3.46931522498 0 84.7716667 0.3883333000000079 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf680 4.45449681271 0 83.47833315 1.9225002750000115 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf681 5.1195298246 0 83.4533334 1.9599998999999997 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf682 4.11856269266 0 84.7766674 0.38333260000001135 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf683 3.59623494226 0 83.344999875 2.122500187500009 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf684 2.42917048367 0 85.255833125 0.2041668750000099 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf685 3.93186488986 0 84.61999955 0.5400004500000023 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 6 5 promise swing_level 7 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf686 3.84974163103 0 83.789999375 1.4550009375000172 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf687 3.97425458366 0 83.988333325 1.1575000125000088 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf688 3.8397168061 0 84.514167225 0.6458327750000109 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 7 5 promise swing_level 6 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf689 4.09568311293 0 83.275000525 2.227499212500014 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf690 3.56149045943 0 84.91583295 0.24416705000001004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 5 5 promise swing_level 6 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf691 4.42774475186 0 83.76000005 1.4999999250000045 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf692 4.22476570574 0 83.904999775 1.282500337500018 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf693 4.11856269266 0 83.91000025 1.2749996250000137 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf694 5.1195298246 0 83.49333325 1.9000001249999983 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf695 2.67115144897 0 83.42999995 1.9950000750000143 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf696 4.23690511031 0 84.7383333 0.4216667000000115 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 4 5 promise swing_level 7 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf697 4.40131209873 0 83.79416645 1.448750324999999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf698 4.99805803481 0 83.810000025 1.4249999625000171 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf699 3.2442204033 0 83.42666685 1.9999997250000021 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf700 4.57189457086 0 83.818333625 1.4124995625000167 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf701 4.60761017185 0 83.0674999 2.5387501500000056 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 5 5 promise swing_level 7 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf702 3.07821945478 0 84.765834275 0.394165725000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf703 4.36224960626 0 84.02166645 1.1075003250000108 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf704 2.76636700953 0 84.60416665 0.5558333500000089 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf705 2.34790180657 0 85.303332675 0.15666732500001218 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf706 3.15056754597 0 84.009166575 1.1262501375000156 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf707 4.49523657385 0 84.54749905 0.6125009500000062 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf708 4.03958106713 0 85.1300001 0.02999990000000141 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 4 5 promise swing_level 7 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf709 5.07547506828 0 83.512499625 1.871250562500002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf710 4.63658695371 0 83.60999985 1.7250002250000165 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf711 3.90066133282 0 83.80166665 1.4375000250000056 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf712 3.40522010623 0 84.083332975 1.0150005375000006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf713 4.55072975326 0 83.676665425 1.6250018625000138 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf714 2.49700123309 0 84.32166715 0.6574992750000135 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf715 3.55728584143 0 83.1108332 2.473750200000005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf716 4.55072975326 0 83.798333375 1.4424999375000098 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf717 4.21269566507 0 83.65583315 1.6562502749999979 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf718 3.04334411031 0 84.176667425 0.8749988625 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf719 2.61896392029 0 83.72333265 1.5550010250000028 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf720 5.60613859814 0 83.339999575 2.130000637500018 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf721 4.91476285223 0 83.251666825 2.2624997625000063 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf722 4.95606048827 0 83.671666925 1.6324996125000126 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf723 3.0060522373 0 83.262499775 2.2462503375000153 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 4 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf724 3.94237731198 0 84.794999925 0.3650000750000061 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 7 5 promise swing_level 6 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf725 3.24779325302 0 83.64083305 1.6787504250000111 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 4 4 promise swing_level 6 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf726 3.03083319426 0 84.04750105 1.0687484250000168 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf727 4.28616846517 0 84.5724998 0.5875002000000024 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf728 2.91685555044 0 83.389167125 2.0562493125000074 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 4 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf729 2.88270158233 0 84.89833305 0.26166695000000006 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 6 5 promise swing_level 7 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf730 3.25488617683 0 82.98416655 2.663750175000011 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf731 4.23690511031 0 83.139999175 2.430001237500008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf732 5.45071234571 0 83.36999975 2.085000375 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf733 3.45315232558 0 82.9650004 2.692499400000017 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf734 2.48024552199 0 83.764166825 1.493749762500002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf735 3.51488775582 0 82.97500075 2.6774988749999977 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf736 5.1195298246 0 83.901665725 1.2875014125000064 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf737 3.16076961237 0 83.37416625 2.078750625000005 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf738 3.46931522498 0 83.415000625 2.0174990625000007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf739 5.03217204616 0 83.3558337 2.10624945 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf740 5.20997422309 0 83.76499975 1.4925003750000059 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf741 3.29121134217 0 84.9216665 0.23833350000000453 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf742 3.76626526312 0 82.96916675 2.6862498750000086 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 3 4 promise swing_level 3 5 promise swing_level 7 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf743 3.39737435359 0 84.125833175 0.9512502375000125 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 7 5 promise swing_level 6 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf744 3.42897630477 0 83.886666875 1.309999687500003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 5 4 promise swing_level 6 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf745 3.84974163103 0 84.352499825 0.6112502625000147 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf746 3.36634957954 0 84.6774997 0.48250030000000665 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 4 5 promise swing_level 7 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf747 4.71064890814 0 83.726666975 1.5499995375000069 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf748 3.0527280537 0 83.5691671 1.7862493500000056 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf749 4.29866382416 0 83.41666675 2.014999875000001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 5 4 promise swing_level 7 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf750 3.8699491435 0 84.844166325 0.3158336750000018 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 6 5 promise swing_level 6 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf751 4.68074200425 0 83.453333275 1.9600000874999992 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 6 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf752 3.84974163103 0 84.13833275 0.9325008750000023 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf753 4.23690511031 0 84.0374998 1.0837502999999984 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf754 3.73279362334 0 83.38416615 2.0637507750000097 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf755 3.3549420805 0 83.305833625 2.1812495624999997 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv perf 28 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf756 2.93135224398 0 83.381666725 2.067499912500004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf757 4.23690511031 0 83.1691663 2.386250550000007 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf758 5.1643560615 0 83.50833265 1.877501025000008 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf759 3.84974163103 0 83.7841657 1.4637514500000037 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf760 4.21269566507 0 83.945000075 1.222499887500014 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf761 3.75178902153 0 84.19999985 0.8400002250000114 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf762 3.93186488986 0 83.3824991 2.0662513500000017 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf763 3.78064739543 0 83.970832525 1.183751212499999 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf764 3.90066133282 0 83.275000075 2.2274998875000165 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf765 4.36224960626 0 82.970831925 2.6837521125000094 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf766 4.40131209873 0 84.19666645 0.8450003250000151 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf767 3.77580687597 0 84.63999925 0.5200007500000027 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf768 3.40522010623 0 84.372499775 0.5812503375000162 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf769 3.37413466519 0 83.10833335 2.477499975000015 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf770 4.36224960626 0 83.624166575 1.703750137500002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf771 2.69064816356 0 83.3299999 2.1450001500000013 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf772 3.17089287752 0 83.14083245 2.4287513250000003 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 4 promise swing_level 6 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf773 2.76636700953 0 83.49833355 1.8924996750000105 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf774 3.90066133282 0 84.675833325 0.4841666750000059 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 7 5 promise swing_level 7 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf775 3.46931522498 0 82.96750055 2.6887491750000123 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf776 3.44499877858 0 84.069166625 1.0362500625000166 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 7 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf777 3.72807479022 0 84.8000002 0.35999980000000564 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf778 3.84974163103 0 83.89249975 1.3012503750000093 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf779 3.39354810299 0 83.616666525 1.7150002125000086 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 -6 gpu conv perf 23 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 +6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf780 4.78711530883 0 83.7558332 1.5062502000000109 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 3 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf781 3.21935410129 0 85.51583235 -0.05583234999999148 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf782 4.36224960626 0 84.7341658 0.4258342000000056 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 4 5 promise swing_level 7 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf783 4.09568311293 0 84.338332525 0.63250121250001 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 6 5 promise swing_level 7 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf784 3.96890595417 0 84.714166325 0.4458336750000115 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 7 5 promise swing_level 6 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf785 3.90066133282 0 84.400833425 0.5387498625000049 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 7 5 promise swing_level 7 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf786 2.87421319527 0 85.265833675 0.19416632500000902 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf787 4.33659088676 0 83.624999175 1.7025012375000088 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf788 4.78711530883 0 83.92583345 1.2512498250000093 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf789 2.39753056999 0 84.877501875 0.28249812500001215 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf790 4.55072975326 0 84.004165775 1.1337513375000015 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf791 3.08465896376 0 85.410834325 0.04916567500000896 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 7 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf792 4.53672840544 0 84.04999965 1.065000525000002 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv samp 35 add fp32 1 tanh fp32 1 +5 gpu conv samp 35 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf793 3.72807479022 0 84.30833215 0.6775017750000103 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 4 4 promise swing_level 4 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf794 5.07547506828 0 83.721666125 1.5575008125000096 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 4 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf795 5.20997422309 0 83.57750015 1.7737497749999989 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 3 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf796 4.00665156404 0 83.979166625 1.1712500625000004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 4 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf797 3.9635717019 0 83.843332025 1.375001962500015 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf798 2.81925910919 0 83.48583365 1.9112495250000023 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 promise swing_level 3 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 -6 gpu conv samp 32 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 +6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf799 2.0622213797 0 84.307499875 0.6787501875000004 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv samp 36 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 tanh fp16 1 +4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv samp 36 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf800 4.1533654053 0 83.934999825 1.2375002624999993 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 3 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 tanh fp32 1 +5 gpu conv fp16 1 add fp16 1 tanh fp16 1 6 promise swing_level 6 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- +++++ conf801 4.06183643479 0 83.36000025 2.0999996250000095 -1 gpu conv fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu conv samp 32 add fp32 1 tanh fp32 1 +3 gpu conv samp 32 add fp16 1 tanh fp16 1 4 promise swing_level 3 -5 gpu conv samp 33 add fp32 1 tanh fp32 1 +5 gpu conv samp 33 add fp16 1 tanh fp16 1 6 promise swing_level 5 -7 gpu mul fp16 1 add fp32 1 -8 gpu softmax fp32 1 +7 gpu mul fp16 1 add fp16 1 +8 gpu softmax fp16 1 ----- diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet_mnist/data/autotuner_data/tuner_confs_batch220.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet_mnist/data/autotuner_data/tuner_confs_batch220.txt index 744b287967..0239f3aaf7 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet_mnist/data/autotuner_data/tuner_confs_batch220.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet_mnist/data/autotuner_data/tuner_confs_batch220.txt @@ -1,3288 +1,3288 @@ +++++ conf1 1 0 99.69 0 -1 gpu conv fp32 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv fp32 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 3 gpu mul fp32 1 add fp32 1 tanh fp32 1 4 gpu mul fp32 1 add fp32 1 tanh fp32 1 5 gpu softmax fp32 1 ----- +++++ conf1 1.78817265464 0 99.220001 0.7049985000000021 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf2 2.01610051566 0 99.400002 0.6899979999999971 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf3 1.73515484904 0 99.559998 0.5300020000000046 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf4 1.72017310656 0 99.540001 0.549998999999994 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf5 2.01610051566 0 99.040001 0.974998499999991 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf6 2.00016617632 0 99.68 0.4099999999999909 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf7 1.72333900478 0 99.620003 0.4699970000000008 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf8 1.78817265464 0 99.099998 0.8850029999999975 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf9 1.73515484904 0 99.580002 0.5099980000000045 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf10 1.78817265464 0 99.32 0.5550000000000068 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf11 1.73515484904 0 99.699997 0.39000300000000154 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf12 1.77226558474 0 99.540001 0.549998999999994 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf13 1.72333900478 0 99.599998 0.4900019999999984 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf14 1.72333900478 0 99.620003 0.4699970000000008 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf15 1.7756263212 0 99.099998 0.8850029999999975 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf16 2.00016617632 0 99.660004 0.42999599999999705 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf17 1.7756263212 0 99.260002 0.6449969999999965 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf18 1.51382277464 0 99.639999 0.45000099999999466 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf19 1.5 0 99.699997 0.39000300000000154 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf20 1.97610564729 0 99.599998 0.4900019999999984 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf21 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf22 2.00016617632 0 99.080002 0.9149970000000067 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf23 1.78817265464 0 99.239998 0.6750029999999967 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf24 1.72333900478 0 99.519997 0.5700029999999942 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf25 2.00016617632 0 99.239998 0.6750029999999967 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf26 1.77226558474 0 99.339996 0.5250059999999976 -1 gpu conv samp 35 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf27 1.72017310656 0 99.300003 0.5849954999999909 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 35 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf28 2.00016617632 0 99.199997 0.7350045000000023 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf29 1.73515484904 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf30 1.99590274244 0 99.099998 0.8850029999999975 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf31 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf32 1.99590274244 0 99.540001 0.549998999999994 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf33 1.7756263212 0 99.279999 0.6150014999999911 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf34 2.00016617632 0 99.639999 0.45000099999999466 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf35 1.72017310656 0 99.620003 0.4699970000000008 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf36 1.72333900478 0 99.639999 0.45000099999999466 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf37 1.73515484904 0 99.559998 0.5300020000000046 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 35 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf38 1.78817265464 0 99.239998 0.6750029999999967 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf39 1.77226558474 0 99.059998 0.9450030000000069 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf40 1.99590274244 0 99.580002 0.5099980000000045 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf41 1.72333900478 0 99.599998 0.4900019999999984 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 35 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf42 1.7756263212 0 99.239998 0.6750029999999967 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf43 1.7756263212 0 99.239998 0.6750029999999967 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf44 1.73515484904 0 99.599998 0.4900019999999984 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf45 2.01610051566 0 99.099998 0.8850029999999975 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf46 1.72333900478 0 99.620003 0.4699970000000008 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf47 2.01610051566 0 99.160004 0.7949939999999955 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf48 1.75663902891 0 99.540001 0.549998999999994 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf49 2.00016617632 0 99.379997 0.46500449999999205 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf50 1.99590274244 0 99.639999 0.45000099999999466 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf51 1.7756263212 0 99.32 0.5550000000000068 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf52 2.01610051566 0 99.580002 0.5099980000000045 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf53 1.51382277464 0 99.620003 0.4699970000000008 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf54 1.72017310656 0 99.440002 0.6499979999999909 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf55 1.97610564729 0 99.660004 0.42999599999999705 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf56 1.72333900478 0 99.599998 0.4900019999999984 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf57 1.7756263212 0 99.040001 0.974998499999991 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf58 1.78817265464 0 99.300003 0.5849954999999909 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf59 1.77226558474 0 99.459999 0.6300010000000015 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf60 1.77226558474 0 99.18 0.7649999999999864 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf61 1.99590274244 0 99.440002 0.6499979999999909 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf62 1.99590274244 0 99.260002 0.6449969999999965 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf63 1.78817265464 0 99.32 0.5550000000000068 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf64 1.72333900478 0 99.660004 0.42999599999999705 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf65 2.00016617632 0 99.360001 0.49499850000000123 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf66 1.73515484904 0 99.519997 0.5700029999999942 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf67 1.73515484904 0 99.5 0.5899999999999977 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 35 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf68 1.78817265464 0 99.279999 0.6150014999999911 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf69 1.7756263212 0 99.32 0.5550000000000068 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf70 1.73515484904 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 35 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf71 1.75663902891 0 99.160004 0.7949939999999955 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf72 2.01610051566 0 99.32 0.5550000000000068 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf73 1.72333900478 0 99.480003 0.6099970000000013 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 35 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf74 1.7756263212 0 99.300003 0.5849954999999909 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf75 1.77226558474 0 99.300003 0.5849954999999909 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf76 1.72333900478 0 99.599998 0.4900019999999984 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf77 1.78817265464 0 99.099998 0.8850029999999975 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf78 1.75663902891 0 99.379997 0.46500449999999205 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf79 1.78817265464 0 99.519997 0.5700029999999942 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf80 2.00016617632 0 99.519997 0.5700029999999942 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf81 1.70544786131 0 99.639999 0.45000099999999466 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf82 1.73515484904 0 99.620003 0.4699970000000008 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf83 1.77226558474 0 99.220001 0.7049985000000021 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf84 1.97610564729 0 99.379997 0.46500449999999205 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf85 1.51137932951 0 99.639999 0.45000099999999466 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf86 2.01610051566 0 99.68 0.4099999999999909 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf87 2.00016617632 0 99.559998 0.5300020000000046 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf88 2.00016617632 0 99.080002 0.9149970000000067 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf89 1.75663902891 0 99.379997 0.46500449999999205 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf90 1.97610564729 0 99.660004 0.42999599999999705 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf91 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf92 1.78817265464 0 99.239998 0.6750029999999967 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf93 1.72017310656 0 99.559998 0.5300020000000046 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf94 1.72333900478 0 99.639999 0.45000099999999466 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 35 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf95 1.97610564729 0 99.080002 0.9149970000000067 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf96 1.7756263212 0 99.32 0.5550000000000068 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf97 1.73515484904 0 99.639999 0.45000099999999466 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf98 2.01610051566 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf99 1.522932631 0 99.540001 0.549998999999994 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf100 1.51382277464 0 99.599998 0.4900019999999984 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf101 2.00016617632 0 99.620003 0.4699970000000008 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf102 1.522932631 0 99.639999 0.45000099999999466 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf103 1.7756263212 0 99.360001 0.49499850000000123 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf104 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf105 1.72333900478 0 99.599998 0.4900019999999984 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf106 1.77226558474 0 99.199997 0.7350045000000023 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf107 1.75663902891 0 99.199997 0.7350045000000023 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf108 1.99590274244 0 99.599998 0.4900019999999984 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf109 1.78817265464 0 99.199997 0.7350045000000023 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf110 1.522932631 0 99.68 0.4099999999999909 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf111 2.01610051566 0 99.540001 0.549998999999994 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf112 1.72333900478 0 99.559998 0.5300020000000046 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf113 1.7756263212 0 99.300003 0.5849954999999909 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf114 2.00016617632 0 99.339996 0.5250059999999976 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf115 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf116 1.72333900478 0 99.540001 0.549998999999994 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 35 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf117 1.70544786131 0 99.660004 0.42999599999999705 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf118 1.78817265464 0 99.379997 0.46500449999999205 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf119 1.97610564729 0 99.379997 0.46500449999999205 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf120 1.7756263212 0 99.160004 0.7949939999999955 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf121 1.73515484904 0 99.519997 0.5700029999999942 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf122 2.00016617632 0 99.559998 0.5300020000000046 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf123 1.99590274244 0 99.459999 0.6300010000000015 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf124 1.70544786131 0 99.620003 0.4699970000000008 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 35 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf125 1.99590274244 0 99.400002 0.6899979999999971 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf126 1.7756263212 0 99.379997 0.46500449999999205 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf127 1.51137932951 0 99.660004 0.42999599999999705 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf128 1.73515484904 0 99.639999 0.45000099999999466 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf129 1.5 0 99.699997 0.39000300000000154 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf130 1.78817265464 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf131 1.72017310656 0 99.300003 0.5849954999999909 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf132 1.73515484904 0 99.519997 0.5700029999999942 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 35 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf133 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf134 1.78817265464 0 99.279999 0.6150014999999911 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf135 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf136 2.01610051566 0 99.080002 0.9149970000000067 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf137 1.77226558474 0 99.120003 0.8549955000000011 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf138 1.73515484904 0 99.599998 0.4900019999999984 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 35 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf139 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf140 2.00016617632 0 99.639999 0.45000099999999466 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf141 1.73515484904 0 99.459999 0.6300010000000015 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf142 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf143 1.78817265464 0 99.239998 0.6750029999999967 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf144 1.7756263212 0 99.459999 0.6300010000000015 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf145 1.78817265464 0 99.239998 0.6750029999999967 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf146 1.72333900478 0 99.599998 0.4900019999999984 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf147 1.72333900478 0 99.620003 0.4699970000000008 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf148 2.00016617632 0 98.400002 1.9349969999999956 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf149 1.72017310656 0 99.559998 0.5300020000000046 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf150 2.01610051566 0 98.540001 1.724998499999991 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf151 2.01610051566 0 99.080002 0.9149970000000067 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf152 2.00016617632 0 99.660004 0.42999599999999705 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf153 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf154 1.70544786131 0 99.620003 0.4699970000000008 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 35 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf155 1.97610564729 0 99.599998 0.4900019999999984 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf156 2.01610051566 0 98.900002 1.1849969999999956 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf157 1.75663902891 0 99.199997 0.7350045000000023 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf158 1.78817265464 0 99.279999 0.6150014999999911 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf159 1.73515484904 0 99.699997 0.39000300000000154 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf160 1.78817265464 0 99.32 0.5550000000000068 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf161 1.99590274244 0 99.099998 0.8850029999999975 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf162 1.78817265464 0 99.059998 0.9450030000000069 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf163 2.01610051566 0 99.580002 0.5099980000000045 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf164 1.75663902891 0 99.199997 0.7350045000000023 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf165 1.72333900478 0 99.639999 0.45000099999999466 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 35 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf166 1.72017310656 0 99.620003 0.4699970000000008 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf167 1.73515484904 0 99.639999 0.45000099999999466 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf168 1.7756263212 0 99.279999 0.6150014999999911 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf169 1.97610564729 0 99.080002 0.9149970000000067 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf170 2.01610051566 0 98.959999 1.0950015000000022 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf171 1.70544786131 0 99.620003 0.4699970000000008 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 35 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf172 2.01610051566 0 99.220001 0.7049985000000021 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf173 1.7756263212 0 99.160004 0.7949939999999955 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf174 1.75663902891 0 99.660004 0.42999599999999705 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf175 2.01610051566 0 98.839996 1.2750059999999976 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf176 1.99590274244 0 98.940002 1.1249969999999863 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf177 1.97610564729 0 99.379997 0.46500449999999205 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf178 2.00016617632 0 99.559998 0.5300020000000046 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf179 2.00016617632 0 99.239998 0.6750029999999967 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf180 1.78817265464 0 98.980003 1.064995500000002 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf181 1.7756263212 0 99.639999 0.45000099999999466 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf182 1.78817265464 0 99.379997 0.46500449999999205 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf183 1.70544786131 0 99.660004 0.42999599999999705 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf184 1.72333900478 0 99.599998 0.4900019999999984 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf185 1.75663902891 0 99.379997 0.46500449999999205 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf186 1.73515484904 0 99.599998 0.4900019999999984 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 35 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf187 1.78817265464 0 99.32 0.5550000000000068 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf188 2.01610051566 0 99.459999 0.6300010000000015 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf189 2.00016617632 0 99.360001 0.49499850000000123 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf190 1.78817265464 0 99.199997 0.7350045000000023 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf191 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf192 1.99590274244 0 99.440002 0.6499979999999909 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf193 1.77226558474 0 99.059998 0.9450030000000069 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf194 1.78817265464 0 99.220001 0.7049985000000021 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf195 2.00016617632 0 99.339996 0.5250059999999976 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf196 2.01610051566 0 99.32 0.5550000000000068 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf197 1.77226558474 0 98.940002 1.1249969999999863 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf198 1.78817265464 0 99.080002 0.9149970000000067 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf199 1.70544786131 0 99.660004 0.42999599999999705 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf200 1.7756263212 0 99.360001 0.49499850000000123 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf201 1.7756263212 0 99.199997 0.7350045000000023 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf202 1.97610564729 0 99.379997 0.46500449999999205 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf203 1.77226558474 0 99.199997 0.7350045000000023 -1 gpu conv samp 35 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf204 1.51382277464 0 99.639999 0.45000099999999466 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf205 2.00016617632 0 99.019997 1.0050044999999912 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf206 1.7756263212 0 99.099998 0.8850029999999975 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf207 1.99590274244 0 99.260002 0.6449969999999965 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf208 2.01610051566 0 99.099998 0.8850029999999975 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf209 1.75663902891 0 99.160004 0.7949939999999955 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf210 1.97610564729 0 98.440002 1.8749969999999863 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf211 1.72017310656 0 99.300003 0.5849954999999909 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf212 1.77226558474 0 98.940002 1.1249969999999863 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf213 2.01610051566 0 98.440002 1.8749969999999863 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf214 1.75663902891 0 99.32 0.5550000000000068 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf215 1.78817265464 0 99.620003 0.4699970000000008 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf216 2.01610051566 0 99.160004 0.7949939999999955 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf217 1.73515484904 0 99.519997 0.5700029999999942 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 35 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf218 1.51137932951 0 99.440002 0.6499979999999909 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf219 1.97610564729 0 98.480003 1.814995500000002 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf220 1.73515484904 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf221 2.00016617632 0 99.360001 0.49499850000000123 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf222 1.77226558474 0 99.5 0.5899999999999977 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf223 1.522932631 0 99.519997 0.5700029999999942 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf224 1.522932631 0 99.540001 0.549998999999994 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf225 1.522932631 0 99.68 0.4099999999999909 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf226 1.97610564729 0 99.660004 0.42999599999999705 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf227 1.72333900478 0 99.660004 0.42999599999999705 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf228 1.77226558474 0 98.980003 1.064995500000002 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf229 1.78817265464 0 99.220001 0.7049985000000021 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf230 1.7756263212 0 99.239998 0.6750029999999967 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf231 1.70544786131 0 99.639999 0.45000099999999466 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf232 1.73515484904 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf233 1.99590274244 0 99.540001 0.549998999999994 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf234 2.00016617632 0 99.199997 0.7350045000000023 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf235 1.78817265464 0 99.559998 0.5300020000000046 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf236 1.5 0 99.699997 0.39000300000000154 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf237 1.73515484904 0 99.639999 0.45000099999999466 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf238 1.97610564729 0 98.440002 1.8749969999999863 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf239 1.77226558474 0 98.800003 1.334995499999991 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf240 1.72333900478 0 99.599998 0.4900019999999984 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf241 1.75663902891 0 99.160004 0.7949939999999955 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf242 1.78817265464 0 99.239998 0.6750029999999967 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf243 1.75663902891 0 99.699997 0.39000300000000154 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf244 1.72017310656 0 99.419998 0.670001999999991 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf245 2.00016617632 0 99.0 1.0349999999999966 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf246 1.99590274244 0 98.519997 1.7550044999999912 -1 gpu conv samp 35 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf247 2.01610051566 0 99.400002 0.6899979999999971 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf248 1.72333900478 0 99.559998 0.5300020000000046 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf249 1.72333900478 0 99.639999 0.45000099999999466 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 35 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf250 1.51137932951 0 99.639999 0.45000099999999466 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf251 2.01610051566 0 97.760002 2.8949969999999965 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf252 1.7756263212 0 99.160004 0.7949939999999955 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf253 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf254 1.72017310656 0 97.860001 2.7449985000000012 -1 gpu conv samp 35 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf255 1.72333900478 0 99.68 0.4099999999999909 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf256 2.00016617632 0 99.559998 0.5300020000000046 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf257 1.72333900478 0 99.660004 0.42999599999999705 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf258 2.00016617632 0 99.620003 0.4699970000000008 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf259 2.00016617632 0 99.639999 0.45000099999999466 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf260 1.99590274244 0 99.639999 0.45000099999999466 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf261 1.97610564729 0 99.599998 0.4900019999999984 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf262 2.00016617632 0 97.980003 2.564995500000002 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf263 1.97610564729 0 99.379997 0.46500449999999205 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf264 1.99590274244 0 98.099998 2.3850029999999975 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf265 2.00016617632 0 98.080002 2.4149970000000067 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf266 1.70544786131 0 99.660004 0.42999599999999705 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf267 1.7756263212 0 99.32 0.5550000000000068 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf268 1.97610564729 0 98.480003 1.814995500000002 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf269 2.01610051566 0 99.32 0.5550000000000068 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf270 1.99590274244 0 98.139999 2.325001499999992 -1 gpu conv samp 35 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf271 1.73515484904 0 99.699997 0.39000300000000154 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf272 1.7756263212 0 99.260002 0.6449969999999965 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf273 1.51382277464 0 99.639999 0.45000099999999466 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf274 1.72017310656 0 99.559998 0.5300020000000046 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf275 1.75663902891 0 98.0 2.5349999999999966 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf276 1.73515484904 0 99.5 0.5899999999999977 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 35 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf277 1.78817265464 0 98.959999 1.0950015000000022 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf278 1.7756263212 0 99.639999 0.45000099999999466 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf279 1.99590274244 0 98.519997 1.7550044999999912 -1 gpu conv samp 35 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf280 2.01610051566 0 99.580002 0.5099980000000045 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf281 1.78817265464 0 97.699997 2.9850045000000023 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf282 1.72333900478 0 99.599998 0.4900019999999984 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf283 1.72333900478 0 99.599998 0.4900019999999984 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf284 1.78817265464 0 99.300003 0.5849954999999909 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf285 1.73515484904 0 99.519997 0.5700029999999942 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf286 1.78817265464 0 99.660004 0.42999599999999705 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf287 1.77226558474 0 98.599998 1.6350029999999975 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf288 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf289 1.77226558474 0 99.059998 0.9450030000000069 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf290 1.522932631 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf291 1.72333900478 0 99.540001 0.549998999999994 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 35 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf292 1.7756263212 0 99.32 0.5550000000000068 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf293 2.01610051566 0 98.480003 1.814995500000002 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf294 1.7756263212 0 99.040001 0.974998499999991 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf295 1.97610564729 0 98.440002 1.8749969999999863 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf296 1.99590274244 0 98.599998 1.6350029999999975 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf297 2.00016617632 0 99.68 0.4099999999999909 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf298 1.73515484904 0 99.559998 0.5300020000000046 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 35 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf299 1.73515484904 0 99.580002 0.5099980000000045 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf300 2.00016617632 0 99.019997 1.0050044999999912 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf301 1.99590274244 0 99.540001 0.549998999999994 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf302 1.7756263212 0 97.760002 2.8949969999999965 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf303 1.75663902891 0 99.32 0.5550000000000068 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf304 1.77226558474 0 99.5 0.5899999999999977 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf305 2.01610051566 0 98.540001 1.724998499999991 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf306 1.78817265464 0 98.82 1.3050000000000068 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf307 1.75663902891 0 99.379997 0.46500449999999205 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf308 2.01610051566 0 97.82 2.805000000000007 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf309 1.97610564729 0 99.080002 0.9149970000000067 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf310 2.01610051566 0 98.959999 1.0950015000000022 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf311 1.77226558474 0 99.279999 0.6150014999999911 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf312 2.01610051566 0 98.459999 1.8450015000000022 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf313 1.78817265464 0 99.220001 0.7049985000000021 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf314 1.522932631 0 99.540001 0.549998999999994 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf315 1.75663902891 0 99.199997 0.7350045000000023 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf316 2.00016617632 0 99.519997 0.5700029999999942 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf317 1.72017310656 0 99.580002 0.5099980000000045 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 35 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf318 1.7756263212 0 99.32 0.5550000000000068 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf319 1.97610564729 0 99.660004 0.42999599999999705 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf320 1.72017310656 0 99.440002 0.6499979999999909 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf321 1.7756263212 0 99.199997 0.7350045000000023 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf322 1.99590274244 0 99.099998 0.8850029999999975 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf323 1.97610564729 0 99.080002 0.9149970000000067 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf324 1.73515484904 0 99.5 0.5899999999999977 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf325 1.99590274244 0 99.580002 0.5099980000000045 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf326 1.99590274244 0 98.940002 1.1249969999999863 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf327 2.00016617632 0 99.360001 0.49499850000000123 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf328 1.72333900478 0 99.660004 0.42999599999999705 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf329 1.78817265464 0 98.940002 1.1249969999999863 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf330 1.78817265464 0 99.18 0.7649999999999864 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf331 1.72333900478 0 99.639999 0.45000099999999466 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf332 1.7756263212 0 99.300003 0.5849954999999909 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf333 1.78817265464 0 99.199997 0.7350045000000023 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf334 1.78817265464 0 99.32 0.5550000000000068 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf335 1.78817265464 0 99.099998 0.8850029999999975 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf336 2.00016617632 0 99.0 1.0349999999999966 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf337 2.00016617632 0 98.059998 2.445003000000007 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf338 2.00016617632 0 98.580002 1.6649970000000067 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf339 1.72333900478 0 99.620003 0.4699970000000008 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf340 2.00016617632 0 98.419998 1.9050029999999865 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf341 2.00016617632 0 98.0 2.5349999999999966 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf342 1.75663902891 0 99.160004 0.7949939999999955 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf343 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf344 2.00016617632 0 98.400002 1.9349969999999956 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf345 2.01610051566 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf346 1.72017310656 0 99.300003 0.5849954999999909 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf347 1.73515484904 0 99.660004 0.42999599999999705 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf348 2.01610051566 0 97.699997 2.9850045000000023 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf349 2.01610051566 0 99.040001 0.974998499999991 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf350 1.99590274244 0 99.400002 0.6899979999999971 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf351 1.78817265464 0 98.980003 1.064995500000002 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf352 2.01610051566 0 98.0 2.5349999999999966 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf353 2.01610051566 0 99.160004 0.7949939999999955 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf354 1.72333900478 0 99.68 0.4099999999999909 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf355 1.73515484904 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf356 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf357 1.70544786131 0 99.620003 0.4699970000000008 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 35 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf358 1.7756263212 0 99.279999 0.6150014999999911 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf359 1.7756263212 0 98.82 1.3050000000000068 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf360 1.7756263212 0 99.379997 0.46500449999999205 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf361 1.99590274244 0 99.599998 0.4900019999999984 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf362 1.78817265464 0 99.279999 0.6150014999999911 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf363 1.70544786131 0 99.660004 0.42999599999999705 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf364 1.70544786131 0 99.660004 0.42999599999999705 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf365 1.78817265464 0 97.760002 2.8949969999999965 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf366 1.72017310656 0 99.620003 0.4699970000000008 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf367 2.01610051566 0 99.540001 0.549998999999994 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf368 2.00016617632 0 99.339996 0.5250059999999976 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf369 1.7756263212 0 99.160004 0.7949939999999955 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf370 1.78817265464 0 99.080002 0.9149970000000067 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf371 1.75663902891 0 99.379997 0.46500449999999205 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf372 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf373 2.00016617632 0 99.639999 0.45000099999999466 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf374 1.72333900478 0 99.599998 0.4900019999999984 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf375 1.99590274244 0 97.940002 2.6249969999999863 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf376 1.78817265464 0 98.980003 1.064995500000002 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf377 1.7756263212 0 99.279999 0.6150014999999911 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf378 1.522932631 0 99.540001 0.549998999999994 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf379 1.5 0 99.699997 0.39000300000000154 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf380 1.78817265464 0 99.099998 0.8850029999999975 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf381 2.01610051566 0 99.099998 0.8850029999999975 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf382 2.01610051566 0 98.120003 2.354995500000001 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf383 1.72017310656 0 99.300003 0.5849954999999909 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 35 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf384 1.70544786131 0 99.639999 0.45000099999999466 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf385 1.97610564729 0 99.660004 0.42999599999999705 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf386 1.73515484904 0 99.639999 0.45000099999999466 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf387 1.72017310656 0 99.620003 0.4699970000000008 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf388 1.51382277464 0 99.660004 0.42999599999999705 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf389 1.7756263212 0 99.300003 0.5849954999999909 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf390 2.01610051566 0 99.459999 0.6300010000000015 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf391 1.97610564729 0 99.599998 0.4900019999999984 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf392 1.72333900478 0 99.620003 0.4699970000000008 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf393 1.51382277464 0 99.620003 0.4699970000000008 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf394 2.00016617632 0 98.980003 1.064995500000002 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf395 2.00016617632 0 99.660004 0.42999599999999705 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf396 1.73515484904 0 99.519997 0.5700029999999942 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf397 1.77226558474 0 99.32 0.5550000000000068 -1 gpu conv samp 35 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf398 1.75663902891 0 99.32 0.5550000000000068 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf399 1.77226558474 0 98.980003 1.064995500000002 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf400 2.00016617632 0 98.040001 2.474998499999991 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf401 2.01610051566 0 99.68 0.4099999999999909 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf402 1.78817265464 0 99.300003 0.5849954999999909 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf403 1.75663902891 0 99.199997 0.7350045000000023 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf404 2.01610051566 0 98.839996 1.2750059999999976 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf405 2.01610051566 0 98.18 2.2649999999999864 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf406 1.7756263212 0 98.400002 1.9349969999999956 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf407 1.73515484904 0 99.559998 0.5300020000000046 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf408 1.72333900478 0 99.580002 0.5099980000000045 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf409 1.72333900478 0 99.639999 0.45000099999999466 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf410 1.73515484904 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet_mnist/data/autotuner_data/tuner_pareto_confs_batch220.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet_mnist/data/autotuner_data/tuner_pareto_confs_batch220.txt index 2580226146..2e3185632c 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet_mnist/data/autotuner_data/tuner_pareto_confs_batch220.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet_mnist/data/autotuner_data/tuner_pareto_confs_batch220.txt @@ -8,889 +8,889 @@ conf1 2.01610051566 0 99.400002 0.6899979999999971 ----- +++++ conf2 2.01610051566 0 99.040001 0.974998499999991 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf3 2.00016617632 0 99.68 0.4099999999999909 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf4 2.00016617632 0 99.660004 0.42999599999999705 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf5 1.97610564729 0 99.599998 0.4900019999999984 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf6 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf7 2.00016617632 0 99.080002 0.9149970000000067 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf8 2.00016617632 0 99.239998 0.6750029999999967 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf9 2.00016617632 0 99.199997 0.7350045000000023 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf10 1.99590274244 0 99.099998 0.8850029999999975 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf11 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf12 1.99590274244 0 99.540001 0.549998999999994 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf13 2.00016617632 0 99.639999 0.45000099999999466 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf14 1.99590274244 0 99.580002 0.5099980000000045 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf15 2.01610051566 0 99.099998 0.8850029999999975 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf16 2.01610051566 0 99.160004 0.7949939999999955 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf17 2.00016617632 0 99.379997 0.46500449999999205 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf18 1.99590274244 0 99.639999 0.45000099999999466 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf19 2.01610051566 0 99.580002 0.5099980000000045 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf20 1.97610564729 0 99.660004 0.42999599999999705 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf21 1.99590274244 0 99.440002 0.6499979999999909 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf22 1.99590274244 0 99.260002 0.6449969999999965 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf23 2.00016617632 0 99.360001 0.49499850000000123 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf24 2.01610051566 0 99.32 0.5550000000000068 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf25 2.00016617632 0 99.519997 0.5700029999999942 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf26 1.97610564729 0 99.379997 0.46500449999999205 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf27 2.01610051566 0 99.68 0.4099999999999909 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf28 2.00016617632 0 99.559998 0.5300020000000046 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf29 2.00016617632 0 99.080002 0.9149970000000067 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf30 1.97610564729 0 99.660004 0.42999599999999705 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf31 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf32 1.97610564729 0 99.080002 0.9149970000000067 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf33 2.01610051566 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf34 2.00016617632 0 99.620003 0.4699970000000008 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf35 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf36 1.99590274244 0 99.599998 0.4900019999999984 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf37 2.01610051566 0 99.540001 0.549998999999994 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf38 2.00016617632 0 99.339996 0.5250059999999976 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf39 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf40 1.97610564729 0 99.379997 0.46500449999999205 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf41 2.00016617632 0 99.559998 0.5300020000000046 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf42 1.99590274244 0 99.459999 0.6300010000000015 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf43 1.99590274244 0 99.400002 0.6899979999999971 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf44 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf45 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf46 2.01610051566 0 99.080002 0.9149970000000067 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf47 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf48 2.00016617632 0 99.639999 0.45000099999999466 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf49 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf50 2.00016617632 0 98.400002 1.9349969999999956 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf51 2.01610051566 0 98.540001 1.724998499999991 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf52 2.01610051566 0 99.080002 0.9149970000000067 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf53 2.00016617632 0 99.660004 0.42999599999999705 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf54 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf55 1.97610564729 0 99.599998 0.4900019999999984 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf56 2.01610051566 0 98.900002 1.1849969999999956 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf57 1.99590274244 0 99.099998 0.8850029999999975 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf58 2.01610051566 0 99.580002 0.5099980000000045 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf59 1.97610564729 0 99.080002 0.9149970000000067 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf60 2.01610051566 0 98.959999 1.0950015000000022 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf61 2.01610051566 0 99.220001 0.7049985000000021 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf62 2.01610051566 0 98.839996 1.2750059999999976 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf63 1.99590274244 0 98.940002 1.1249969999999863 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf64 1.97610564729 0 99.379997 0.46500449999999205 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf65 2.00016617632 0 99.559998 0.5300020000000046 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf66 2.00016617632 0 99.239998 0.6750029999999967 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf67 2.01610051566 0 99.459999 0.6300010000000015 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf68 2.00016617632 0 99.360001 0.49499850000000123 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf69 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf70 1.99590274244 0 99.440002 0.6499979999999909 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf71 2.00016617632 0 99.339996 0.5250059999999976 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf72 2.01610051566 0 99.32 0.5550000000000068 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf73 1.97610564729 0 99.379997 0.46500449999999205 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf74 2.00016617632 0 99.019997 1.0050044999999912 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf75 1.99590274244 0 99.260002 0.6449969999999965 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf76 2.01610051566 0 99.099998 0.8850029999999975 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf77 1.97610564729 0 98.440002 1.8749969999999863 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf78 2.01610051566 0 98.440002 1.8749969999999863 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf79 2.01610051566 0 99.160004 0.7949939999999955 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf80 1.97610564729 0 98.480003 1.814995500000002 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf81 2.00016617632 0 99.360001 0.49499850000000123 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf82 1.97610564729 0 99.660004 0.42999599999999705 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf83 1.99590274244 0 99.540001 0.549998999999994 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf84 2.00016617632 0 99.199997 0.7350045000000023 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf85 1.97610564729 0 98.440002 1.8749969999999863 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf86 2.00016617632 0 99.0 1.0349999999999966 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf87 1.99590274244 0 98.519997 1.7550044999999912 -1 gpu conv samp 35 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 35 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf88 2.01610051566 0 99.400002 0.6899979999999971 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf89 2.01610051566 0 97.760002 2.8949969999999965 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf90 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf91 2.01610051566 0 99.32 0.5550000000000068 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf92 2.01610051566 0 99.580002 0.5099980000000045 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf93 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf94 2.01610051566 0 98.480003 1.814995500000002 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf95 2.01610051566 0 98.540001 1.724998499999991 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf96 2.01610051566 0 97.82 2.805000000000007 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf97 2.01610051566 0 98.959999 1.0950015000000022 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf98 2.01610051566 0 98.459999 1.8450015000000022 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf99 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf100 2.01610051566 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf101 2.01610051566 0 97.699997 2.9850045000000023 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf102 2.01610051566 0 99.040001 0.974998499999991 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf103 2.01610051566 0 98.0 2.5349999999999966 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf104 2.01610051566 0 99.160004 0.7949939999999955 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf105 2.01610051566 0 99.540001 0.549998999999994 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf106 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf107 2.01610051566 0 99.099998 0.8850029999999975 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf108 2.01610051566 0 98.120003 2.354995500000001 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf109 2.01610051566 0 99.459999 0.6300010000000015 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf110 2.01610051566 0 99.68 0.4099999999999909 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf111 2.01610051566 0 98.839996 1.2750059999999976 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf112 2.01610051566 0 98.18 2.2649999999999864 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet_mnist/data/autotuner_data/tuner_promise_confs_batch220_multi.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet_mnist/data/autotuner_data/tuner_promise_confs_batch220_multi.txt index 54755c06b2..db97ae4b9d 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet_mnist/data/autotuner_data/tuner_promise_confs_batch220_multi.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet_mnist/data/autotuner_data/tuner_promise_confs_batch220_multi.txt @@ -8,13961 +8,13961 @@ conf1 1 0 99.69 0 ----- +++++ conf1 3.86059861244 0 99.5865002 0.5034997999999945 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf2 2.47778695782 0 99.401499275 0.6885007249999916 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf3 2.50228643329 0 99.302000275 0.5819995875000004 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf4 3.92040413524 0 99.545499375 0.5445006249999921 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf5 2.00016617632 0 99.360001 0.49499850000000123 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf6 2.47778695782 0 99.04700025 0.964499625000002 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf7 1.99590274244 0 99.099998 0.8850029999999975 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf8 2.00016617632 0 99.68 0.4099999999999909 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf9 3.77195447337 0 99.631001025 0.4589989749999944 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf10 2.47778695782 0 99.2549994 0.6525008999999926 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf11 6.14799414721 0 99.50250035 0.5874996499999924 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf12 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf13 2.47778695782 0 99.38799915 0.4530012749999983 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf14 3.84474688915 0 99.5614995 0.5285005000000013 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf15 3.34244261096 0 99.594499925 0.4955000750000039 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf16 2.50228643329 0 99.4655008 0.6244991999999968 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf17 2.47778695782 0 98.991999975 1.0470000374999984 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf18 6.14799414721 0 99.41749905 0.6725009499999942 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf19 3.34244261096 0 99.51449975 0.5755002499999989 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf20 3.92040413524 0 99.5669999 0.5230000999999987 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf21 3.80166404425 0 99.543499725 0.546500274999994 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf22 3.85964385182 0 99.4755007 0.6144993 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf23 2.47778695782 0 99.31599945 0.5610008249999865 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf24 2.47778695782 0 99.405499825 0.6845001749999909 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf25 2.50228643329 0 99.37349895 0.4747515749999991 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf26 6.61857279171 0 99.494500325 0.5954996749999936 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf27 3.84474688915 0 99.417499625 0.6725003749999928 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf28 2.47778695782 0 99.026998925 0.9945016124999952 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf29 2.50228643329 0 99.370499525 0.4792507124999972 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf30 6.30106886729 0 99.5040001 0.5859998999999988 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf31 1.99590274244 0 99.540001 0.549998999999994 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf32 3.7862916372 0 99.5900006 0.49999940000000154 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf33 3.86059861244 0 99.612000375 0.4779996250000039 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf34 3.92040413524 0 99.558499875 0.5315001250000023 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf35 3.86059861244 0 99.56849965 0.521500349999991 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf36 5.02870270579 0 99.267000425 0.6344993624999873 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf37 6.14799414721 0 99.50099985 0.5890001499999983 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf38 2.00016617632 0 99.639999 0.45000099999999466 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf39 2.01610051566 0 99.580002 0.5099980000000045 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf40 2.50228643329 0 99.027499025 0.9937514625000006 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf41 6.14799414721 0 99.554999675 0.535000324999993 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf42 2.00016617632 0 99.080002 0.9149970000000067 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf43 3.84474688915 0 99.7254995 0.3645004999999998 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf44 3.92040413524 0 99.57699975 0.5130002499999989 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf45 6.14799414721 0 99.5009999 0.5890001000000012 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf46 2.47778695782 0 99.376999825 0.46950026249999866 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf47 1.97610564729 0 99.660004 0.42999599999999705 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf48 3.85964385182 0 99.269500375 0.6307494374999862 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf49 3.13161472572 0 99.5865002 0.5034997999999945 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf50 6.30106886729 0 99.25849995 0.6472500749999952 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf51 5.92620561097 0 99.54949965 0.5405003499999964 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf52 2.47124761202 0 99.10999975 0.8700003749999965 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf53 2.01610051566 0 99.0 1.0349999999999966 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf54 3.38717868509 0 99.33499915 0.5325012749999942 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf55 3.80166404425 0 99.513499725 0.5765002749999951 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf56 2.44096937877 0 99.3519992 0.5070012000000048 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf57 3.92040413524 0 99.475999775 0.6140002249999924 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf58 3.92040413524 0 99.3189995 0.5565007499999908 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf59 2.47778695782 0 99.479500975 0.6104990250000043 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf60 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf61 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf62 2.81322619695 0 99.540999075 0.5490009249999958 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf63 1.99590274244 0 99.400002 0.6899979999999971 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf64 3.86059861244 0 99.615501 0.474499000000003 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf65 3.86059861244 0 99.585999975 0.5040000250000048 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf66 3.86059861244 0 99.515000025 0.5749999749999916 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf67 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf68 5.79060658268 0 99.425999975 0.6640000250000014 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf69 2.50228643329 0 98.901499925 1.1827501125000026 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf70 2.47124761202 0 98.265500075 2.136749887499988 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf71 3.77195447337 0 99.63050095 0.45949904999999946 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf72 2.50228643329 0 98.91149985 1.167750224999999 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf73 5.02870270579 0 99.46900055 0.6209994499999937 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf74 2.00016617632 0 98.580002 1.6649970000000067 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf75 2.57685599488 0 99.371499325 0.4777510124999935 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf76 2.51187737029 0 99.390999175 0.6990008249999932 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf77 2.01610051566 0 98.959999 1.0950015000000022 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf78 6.14799414721 0 99.49949955 0.5905004500000018 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf79 2.47124761202 0 99.21099945 0.7185008249999925 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf80 6.30106886729 0 99.517499475 0.5725005250000038 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf81 5.92620561097 0 99.542999375 0.547000625000004 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf82 1.99590274244 0 99.639999 0.45000099999999466 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf83 3.33055390722 0 99.418500225 0.6714997749999952 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf84 6.61857279171 0 99.50099985 0.5890001499999983 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf85 3.80166404425 0 99.521499575 0.5685004249999907 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf86 4.4071692756 0 99.419500275 0.6704997249999934 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf87 2.47124761202 0 99.37749925 0.46875112499999716 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf88 5.79060658268 0 99.565499625 0.5245003749999967 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf89 6.7963162944 0 99.453000325 0.6369996749999928 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf90 1.99590274244 0 98.440002 1.8749969999999863 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf91 6.14799414721 0 99.43249995 0.6575000500000044 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf92 3.85964385182 0 99.473000375 0.6169996249999997 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf93 2.50228643329 0 98.878998975 1.216501537499994 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf94 5.02870270579 0 99.442000525 0.6479994750000003 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf95 6.57211871555 0 99.33249975 0.5362503750000016 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf96 3.38717868509 0 99.336998575 0.5295021375000033 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf97 3.92040413524 0 99.5710002 0.5189997999999975 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf98 4.64385542353 0 99.51799975 0.5720002499999964 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf99 2.47124761202 0 99.01849985 1.007250225 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf100 2.5439518228 0 99.4895 0.600499999999991 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf101 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf102 6.10789096832 0 99.341499625 0.5227505625000006 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf103 2.50228643329 0 99.438500325 0.651499674999991 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf104 2.50228643329 0 99.4235 0.6664999999999935 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf105 2.50228643329 0 99.00049975 1.034250374999992 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf106 6.10789096832 0 99.590500925 0.49949907499999713 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf107 3.80166404425 0 99.5274999 0.5625001000000026 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf108 6.7963162944 0 99.495500375 0.5944996249999918 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf109 6.14799414721 0 99.495000225 0.5949997749999995 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf110 3.86059861244 0 99.5875 0.5024999999999921 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf111 2.00016617632 0 98.980003 1.064995500000002 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf112 4.90489779833 0 99.4235004 0.6664996000000031 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf113 4.93072604433 0 99.519499875 0.5705001250000038 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf114 2.47124761202 0 99.22599905 0.6960014249999986 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf115 2.57685599488 0 99.38249875 0.4612518750000021 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf116 2.47778695782 0 99.387499325 0.4537510125000068 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf117 3.92040413524 0 99.4889999 0.6010001000000017 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf118 2.47124761202 0 99.3574979 0.49875314999999887 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf119 4.90489779833 0 99.338499075 0.5272513874999945 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf120 2.57685599488 0 98.909499725 1.1707504124999915 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf121 3.7862916372 0 99.425500575 0.6644994249999968 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf122 5.02870270579 0 99.51799975 0.5720002499999964 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf123 6.10789096832 0 99.41449975 0.6755002499999933 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf124 3.80166404425 0 99.5164998 0.5735001999999924 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf125 6.30106886729 0 99.469500325 0.6204996749999993 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf126 4.93072604433 0 99.522999375 0.567000625 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf127 3.86059861244 0 99.618499975 0.4715000249999918 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf128 2.55088214386 0 99.382999475 0.46050078749998846 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf129 6.14799414721 0 99.513499725 0.5765002749999951 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf130 6.30106886729 0 99.449999925 0.6400000749999976 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf131 2.50228643329 0 98.300999875 2.083500187499993 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf132 3.33055390722 0 99.563499475 0.5265005249999973 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf133 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf134 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf135 1.99590274244 0 98.139999 2.325001499999992 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf136 3.85964385182 0 99.47600045 0.613999549999997 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf137 2.78229733114 0 99.5514998 0.5385001999999958 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf138 3.77195447337 0 99.3254996 0.5467505999999958 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf139 2.00016617632 0 99.019997 1.0050044999999912 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf140 2.81322619695 0 99.5814995 0.508500499999991 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf141 2.57685599488 0 99.417499875 0.6725001249999935 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf142 4.93072604433 0 99.5164998 0.5735001999999924 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf143 2.50228643329 0 99.466500825 0.6234991750000006 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf144 1.99590274244 0 99.260002 0.6449969999999965 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf145 2.77405457184 0 99.573499525 0.5165004749999952 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf146 3.70186719231 0 99.722999925 0.3670000750000014 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf147 2.01610051566 0 99.220001 0.7049985000000021 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf148 3.92040413524 0 99.5589999 0.5310000999999943 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf149 6.61857279171 0 99.414999925 0.6750000749999941 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf150 3.38717868509 0 99.56849935 0.5215006500000016 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf151 3.34244261096 0 99.616000475 0.47399952499999076 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf152 5.33920664205 0 99.4435006 0.6464994000000047 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf153 2.5439518228 0 98.314999825 2.062500262499995 -1 gpu conv samp 35 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 35 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf154 2.01610051566 0 99.080002 0.9149970000000067 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf155 2.50228643329 0 98.911000225 1.168499662500004 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf156 2.01610051566 0 99.459999 0.6300010000000015 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf157 2.50228643329 0 99.39599955 0.6940004499999987 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf158 3.38717868509 0 99.46750085 0.6224991500000044 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf159 2.81322619695 0 99.461000775 0.6289992249999955 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf160 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf161 2.50228643329 0 99.007999825 1.0230002624999983 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf162 3.92040413524 0 99.5534999 0.5365000999999922 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf163 2.50228643329 0 99.43850055 0.6514994499999972 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf164 2.01610051566 0 98.0 2.5349999999999966 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf165 2.01610051566 0 97.879997 2.715004499999992 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf166 6.30106886729 0 99.2590004 0.6464993999999891 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf167 2.50228643329 0 98.98099975 1.0635003750000038 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf168 2.50228643329 0 99.372998825 0.4755017625000022 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf169 5.02870270579 0 99.4364997 0.6535002999999989 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf170 2.50228643329 0 97.885499575 2.7067506375000008 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf171 2.01610051566 0 97.900002 2.6849969999999956 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf172 2.50228643329 0 98.90100005 1.1834999250000067 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf173 2.50228643329 0 98.326998875 2.044501687499995 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf174 3.92040413524 0 99.4730004 0.6169995999999941 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf175 5.02870270579 0 99.25650025 0.6502496249999936 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf176 2.50228643329 0 99.40049935 0.6895006499999937 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf177 2.01610051566 0 99.400002 0.6899979999999971 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf178 2.50228643329 0 99.375999275 0.47100108749999947 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf179 2.01610051566 0 99.099998 0.8850029999999975 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf180 2.81322619695 0 99.341498825 0.5227517624999933 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf181 6.30106886729 0 99.451500425 0.6384995749999917 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf182 2.50228643329 0 99.094000075 0.8939998875000015 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf183 3.92040413524 0 99.31999995 0.5550000750000024 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf184 2.50228643329 0 99.373498975 0.47475153749999066 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf185 2.50228643329 0 99.47200125 0.6179987499999925 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf186 3.92040413524 0 99.582000275 0.5079997249999991 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf187 3.85964385182 0 99.5244993 0.5655006999999955 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf188 6.30106886729 0 99.459000325 0.6309996749999925 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf189 2.01610051566 0 97.699997 2.9850045000000023 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf190 6.30106886729 0 99.472500875 0.6174991250000034 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf191 2.50228643329 0 97.651000575 3.0584991375 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf192 2.01610051566 0 98.440002 1.8749969999999863 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf193 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf194 3.92040413524 0 99.551 0.5389999999999958 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf195 2.50228643329 0 99.423000075 0.6669999249999933 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf196 3.85964385182 0 99.475500725 0.6144992749999943 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf197 2.01610051566 0 98.540001 1.724998499999991 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf198 2.01610051566 0 98.18 2.2649999999999864 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf199 6.30106886729 0 99.517000075 0.5729999249999992 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf200 2.01610051566 0 99.540001 0.549998999999994 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf201 3.85964385182 0 99.437000325 0.6529996749999981 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf202 6.30106886729 0 99.437500275 0.6524997249999928 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf203 2.50228643329 0 99.188499275 0.7522510874999995 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf204 2.01610051566 0 97.760002 2.8949969999999965 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf205 2.01610051566 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf206 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf207 6.30106886729 0 99.440500775 0.6494992249999939 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf208 2.47778695782 0 98.98650045 1.0552493250000055 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf209 3.92040413524 0 99.559499725 0.5305002750000029 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf210 6.14799414721 0 99.559999775 0.5300002250000034 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf211 4.93072604433 0 99.5089997 0.5810002999999938 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf212 4.90489779833 0 99.332999675 0.535500487500002 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf213 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf214 2.47124761202 0 99.316000575 0.5609991374999908 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf215 2.01610051566 0 98.959999 1.0950015000000022 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf216 3.92040413524 0 99.5844996 0.5055003999999969 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf217 2.47124761202 0 99.1250007 0.847498949999995 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf218 2.47124761202 0 99.381998825 0.4620017625000017 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf219 6.14799414721 0 99.42000025 0.6699997499999967 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf220 6.10789096832 0 99.416 0.6740000000000009 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf221 5.92620561097 0 99.5324998 0.5575002000000012 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf222 2.50228643329 0 99.3659991 0.4860013500000022 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf223 2.81322619695 0 99.59350015 0.49649985000000074 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf224 3.85964385182 0 99.272000525 0.626999212500003 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf225 6.14799414721 0 99.46450015 0.6254998499999914 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf226 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf227 6.14799414721 0 99.49450015 0.5954998500000045 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf228 2.81322619695 0 99.334998975 0.5325015374999893 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf229 6.14799414721 0 99.5029999 0.5870000999999917 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf230 3.86059861244 0 99.57099965 0.5190003499999932 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf231 6.30106886729 0 99.4249999 0.6650000999999947 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf232 3.77195447337 0 99.64550045 0.44449954999999763 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf233 2.01610051566 0 99.160004 0.7949939999999955 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf234 6.30106886729 0 99.455500425 0.634499575000001 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf235 2.44096937877 0 99.4455007 0.6444993000000011 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf236 5.02870270579 0 99.430500375 0.6594996250000037 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf237 2.77405457184 0 99.431500125 0.6584998749999983 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf238 6.30106886729 0 99.259000225 0.6464996625000055 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf239 3.92040413524 0 99.331999675 0.5370004874999879 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf240 3.34244261096 0 99.561499925 0.528500074999991 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf241 6.10789096832 0 99.344999375 0.5175009374999959 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf242 4.93072604433 0 99.524499625 0.5655003749999935 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf243 2.47778695782 0 99.0039994 1.0290008999999998 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf244 6.14799414721 0 99.501000025 0.5889999750000016 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf245 3.86059861244 0 99.590000225 0.4999997750000006 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf246 2.81322619695 0 99.585999875 0.5040001249999989 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf247 3.84474688915 0 99.570500075 0.5194999249999995 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf248 3.38717868509 0 99.3339993 0.5340010499999934 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf249 2.00016617632 0 99.379997 0.46500449999999205 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf250 2.50228643329 0 99.2840001 0.6089998499999965 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf251 6.10789096832 0 99.57099985 0.5190001499999909 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf252 5.02870270579 0 99.26750015 0.6337497749999912 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf253 2.50228643329 0 99.388999175 0.45150123750000404 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf254 3.71567552873 0 99.558999425 0.5310005750000016 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf255 1.99590274244 0 99.599998 0.4900019999999984 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf256 3.33055390722 0 99.567500075 0.5224999249999996 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf257 2.50228643329 0 99.387999625 0.4530005624999873 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf258 2.47778695782 0 99.325999725 0.5460004124999926 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf259 3.80166404425 0 99.533999275 0.5560007249999984 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf260 2.00016617632 0 99.0 1.0349999999999966 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf261 1.99590274244 0 98.940002 1.1249969999999863 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf262 2.47778695782 0 99.37499845 0.47250232499998646 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf263 1.99590274244 0 99.099998 0.8850029999999975 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf264 2.00016617632 0 98.980003 1.064995500000002 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf265 4.78704248134 0 99.542999225 0.5470007749999951 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf266 3.86059861244 0 99.6060007 0.48399930000000213 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf267 3.86059861244 0 99.614001325 0.47599867499999393 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf268 3.86059861244 0 99.5959999 0.4940001000000024 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf269 5.02870270579 0 99.524999575 0.5650004250000024 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf270 3.34244261096 0 99.583499925 0.5065000749999996 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf271 2.01610051566 0 98.900002 1.1849969999999956 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf272 3.92040413524 0 99.54199885 0.5480011499999989 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf273 3.86059861244 0 99.582500175 0.5074998249999908 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf274 3.33055390722 0 99.724499675 0.36550032499999363 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf275 6.14799414721 0 99.504500025 0.5854999749999991 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf276 2.47124761202 0 98.819999675 1.3050004874999885 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf277 6.10789096832 0 99.40649895 0.6835010499999982 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf278 6.10789096832 0 99.577999475 0.5120005249999992 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf279 2.47778695782 0 99.30150025 0.5827496249999911 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf280 2.50228643329 0 99.3719988 0.4770017999999965 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf281 2.50228643329 0 98.29700015 2.089499774999993 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf282 4.93072604433 0 99.470500775 0.6194992249999928 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf283 2.50228643329 0 99.439000875 0.6509991249999928 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf284 6.14799414721 0 99.46549985 0.6245001499999973 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf285 3.85964385182 0 99.43350045 0.6564995500000009 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf286 6.30106886729 0 99.4350001 0.6549999000000014 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf287 2.50228643329 0 98.327000375 2.0444994375000007 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf288 3.38717868509 0 99.326000275 0.545999587499999 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf289 2.50228643329 0 99.420999875 0.669000124999991 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf290 2.50228643329 0 99.467501025 0.6224989749999935 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf291 3.92040413524 0 99.583000475 0.506999524999992 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf292 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf293 2.50228643329 0 99.38349915 0.4597512749999879 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf294 2.81322619695 0 99.4725008 0.6174991999999918 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf295 1.99590274244 0 99.540001 0.549998999999994 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf296 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf297 2.01610051566 0 99.32 0.5550000000000068 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf298 3.85964385182 0 99.4755009 0.6144990999999976 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf299 3.80166404425 0 99.524499125 0.5655008749999922 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf300 3.86059861244 0 99.5054997 0.5845002999999963 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf301 6.30106886729 0 99.45100085 0.638999149999998 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf302 5.02870270579 0 99.4765002 0.6134997999999939 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf303 2.47778695782 0 99.0105 1.0192500000000067 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf304 3.92040413524 0 99.562000025 0.5279999749999945 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf305 6.30106886729 0 99.50299955 0.5870004499999993 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf306 3.84474688915 0 99.729999675 0.3600003249999958 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf307 6.14799414721 0 99.5119996 0.578000400000002 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf308 4.90489779833 0 99.58749965 0.5025003499999997 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf309 3.7862916372 0 99.33799965 0.528000524999996 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf310 6.30106886729 0 99.2670004 0.6344993999999957 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf311 6.30106886729 0 99.435500625 0.6544993749999947 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf312 2.47778695782 0 99.331499275 0.5377510874999984 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf313 2.77405457184 0 99.580000425 0.5099995750000034 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf314 6.14799414721 0 99.54800025 0.5419997499999966 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf315 2.50228643329 0 98.917000175 1.1594997374999991 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf316 2.01610051566 0 98.440002 1.8749969999999863 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf317 2.01610051566 0 99.400002 0.6899979999999971 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf318 3.92040413524 0 99.55649935 0.533500650000002 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf319 2.47778695782 0 99.265 0.6374999999999957 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf320 2.47124761202 0 98.317499 2.0587514999999996 -1 gpu conv samp 35 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 35 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf321 3.33055390722 0 99.427499975 0.6625000249999943 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf322 2.50228643329 0 99.00349965 1.0297505250000043 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf323 2.00016617632 0 99.559998 0.5300020000000046 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf324 6.30106886729 0 99.4665007 0.6234993000000003 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf325 4.93072604433 0 99.518999775 0.5710002250000002 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf326 5.92620561097 0 99.542499525 0.5475004750000011 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf327 3.86059861244 0 99.55699975 0.533000249999995 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf328 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf329 2.01610051566 0 99.580002 0.5099980000000045 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf330 2.50228643329 0 99.3734996 0.47475059999999303 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf331 6.14799414721 0 99.50849975 0.5815002499999992 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf332 2.81322619695 0 99.33399895 0.5340015750000049 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf333 2.50228643329 0 99.375999975 0.47100003749999786 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf334 3.92040413524 0 99.546999 0.5430009999999982 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf335 6.30106886729 0 99.43900055 0.6509994499999948 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf336 3.85964385182 0 99.4830001 0.6069998999999996 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf337 2.01610051566 0 98.480003 1.814995500000002 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf338 5.02870270579 0 99.470500275 0.6194997249999915 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf339 2.50228643329 0 98.750500325 1.40924951249999 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf340 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf341 2.81322619695 0 99.540999625 0.549000375 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf342 5.02870270579 0 99.518000275 0.5719997249999921 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf343 6.30106886729 0 99.462500575 0.6274994249999907 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf344 2.01610051566 0 98.900002 1.1849969999999956 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf345 5.02870270579 0 99.259499975 0.6457500375000009 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf346 2.01610051566 0 99.32 0.5550000000000068 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf347 2.50228643329 0 98.88849885 1.2022517249999893 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf348 2.50228643329 0 99.401999825 0.6880001749999934 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf349 2.01610051566 0 97.760002 2.8949969999999965 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf350 2.50228643329 0 98.31600045 2.0609993249999903 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf351 3.85964385182 0 99.441 0.6489999999999952 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf352 2.50228643329 0 99.0334995 0.9847507499999892 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf353 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf354 2.50228643329 0 99.443500275 0.6464997249999925 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf355 2.01610051566 0 99.220001 0.7049985000000021 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf356 2.01610051566 0 99.459999 0.6300010000000015 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf357 2.81322619695 0 99.57849965 0.51150035 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf358 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf359 2.50228643329 0 99.42499965 0.665000349999994 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf360 2.50228643329 0 98.90749945 1.173750824999992 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf361 3.38717868509 0 99.3249995 0.5475007499999904 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf362 2.50228643329 0 99.01050035 1.0192494749999952 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf363 2.50228643329 0 99.37949865 0.46575202499999335 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf364 2.50228643329 0 98.987000375 1.0544994375000059 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf365 2.50228643329 0 99.467501025 0.6224989749999935 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf366 3.38717868509 0 99.533499175 0.5565008249999949 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf367 5.02870270579 0 99.465000275 0.6249997250000036 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf368 2.50228643329 0 98.8994997 1.1857504499999862 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf369 2.50228643329 0 99.095000075 0.8924998874999943 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf370 6.30106886729 0 99.465499825 0.6245001750000029 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf371 5.02870270579 0 99.480000725 0.6099992750000013 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf372 2.01610051566 0 99.160004 0.7949939999999955 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf373 2.01610051566 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf374 3.85964385182 0 99.462000625 0.627999374999996 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf375 3.38717868509 0 99.583499875 0.5065001249999966 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf376 2.01610051566 0 97.699997 2.9850045000000023 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf377 2.01610051566 0 98.839996 1.2750059999999976 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf378 6.30106886729 0 99.504999975 0.5850000249999937 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf379 6.30106886729 0 99.258500625 0.647249062500002 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf380 2.50228643329 0 99.290000375 0.5999994374999886 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf381 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf382 5.02870270579 0 99.439500025 0.6504999749999968 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf383 2.50228643329 0 97.8925002 2.6962496999999956 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf384 3.38717868509 0 99.55950045 0.5304995499999962 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf385 2.01610051566 0 99.580002 0.5099980000000045 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf386 6.30106886729 0 99.435499425 0.6545005749999945 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf387 2.01610051566 0 99.040001 0.974998499999991 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf388 2.47124761202 0 99.21699935 0.7095009750000045 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf389 2.00016617632 0 99.239998 0.6750029999999967 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf390 6.14799414721 0 99.512499525 0.5775004750000022 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf391 6.14799414721 0 99.496000075 0.593999925 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf392 6.30106886729 0 99.256499475 0.6502507874999992 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf393 4.90489779833 0 99.41399975 0.6760002499999956 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf394 2.50228643329 0 99.37949885 0.46575172499998985 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf395 2.00016617632 0 99.519997 0.5700029999999942 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf396 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf397 6.10789096832 0 99.40799955 0.6820004499999982 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf398 4.93072604433 0 99.4785008 0.6114991999999916 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf399 2.47778695782 0 99.044499725 0.9682504125000051 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf400 2.01610051566 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf401 3.92040413524 0 99.549000125 0.5409998749999915 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf402 4.78704248134 0 99.557500025 0.5324999750000018 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf403 3.92040413524 0 99.339499575 0.5257506374999892 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf404 3.77195447337 0 99.64400015 0.4459998500000012 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf405 2.78229733114 0 99.515498725 0.5745012749999973 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf406 1.99590274244 0 99.580002 0.5099980000000045 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf407 3.38717868509 0 99.539999575 0.5500004250000018 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf408 2.50228643329 0 99.28500105 0.6074984249999886 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf409 6.30106886729 0 99.4274997 0.6625002999999993 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf410 1.99590274244 0 99.639999 0.45000099999999466 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf411 3.86059861244 0 99.61500045 0.47499955000000116 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf412 3.7862916372 0 99.422500375 0.6674996249999993 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf413 2.01610051566 0 99.68 0.4099999999999909 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf414 2.81322619695 0 99.592500325 0.4974996749999946 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf415 6.10789096832 0 99.3419997 0.5220004499999931 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf416 6.14799414721 0 99.461500375 0.6284996249999978 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf417 3.85964385182 0 99.4770005 0.6129994999999951 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf418 6.14799414721 0 99.508999875 0.5810001249999971 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf419 1.99590274244 0 99.400002 0.6899979999999971 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf420 2.50228643329 0 99.3769985 0.4695022499999979 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf421 2.01610051566 0 99.32 0.5550000000000068 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf422 2.00016617632 0 99.339996 0.5250059999999976 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf423 2.00016617632 0 99.080002 0.9149970000000067 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf424 6.30106886729 0 99.4700003 0.6199997000000025 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf425 6.30106886729 0 99.45350025 0.636499749999993 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf426 3.84474688915 0 99.56299955 0.5270004499999971 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf427 2.81322619695 0 99.578000175 0.5119998249999981 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf428 2.47124761202 0 99.12450055 0.8482491750000065 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf429 4.90489779833 0 99.5894995 0.5005004999999955 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf430 2.47778695782 0 99.179000625 0.7664990624999959 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf431 3.86059861244 0 99.49899945 0.5910005499999983 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf432 2.50228643329 0 99.2835007 0.6097489499999895 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf433 3.38717868509 0 99.567999775 0.5220002249999937 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf434 6.14799414721 0 99.548998875 0.5410011250000025 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf435 2.77405457184 0 99.736499925 0.3535000749999938 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf436 4.93072604433 0 99.5544993 0.5355006999999944 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf437 4.90489779833 0 99.329499025 0.5407514624999905 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf438 5.02870270579 0 99.475999575 0.6140004249999947 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf439 6.30106886729 0 99.5009997 0.5890003000000036 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf440 3.80166404425 0 99.50849925 0.5815007499999979 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf441 6.30106886729 0 99.434000275 0.6559997249999953 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf442 6.14799414721 0 99.41999995 0.6700000499999931 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf443 3.85964385182 0 99.47000065 0.6199993499999948 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf444 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf445 2.01610051566 0 98.900002 1.1849969999999956 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf446 6.14799414721 0 99.50449985 0.5855001499999958 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf447 3.38717868509 0 99.582499575 0.5075004249999978 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf448 2.01610051566 0 99.099998 0.8850029999999975 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf449 3.38717868509 0 99.543499525 0.5465004749999963 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf450 2.00016617632 0 98.220001 2.204998500000002 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf451 2.00016617632 0 99.080002 0.9149970000000067 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf452 6.30106886729 0 99.442500275 0.6474997249999973 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf453 2.01610051566 0 99.0 1.0349999999999966 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf454 2.50228643329 0 99.292000475 0.5969992875000045 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf455 2.44096937877 0 99.3534991 0.5047513500000065 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf456 2.50228643329 0 98.90349965 1.1797505249999958 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf457 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf458 2.47778695782 0 99.26350005 0.6397499249999896 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf459 4.93072604433 0 99.5080002 0.5819997999999998 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf460 2.81322619695 0 99.578999925 0.5110000749999927 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf461 2.00016617632 0 99.339996 0.5250059999999976 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf462 3.92040413524 0 99.556999575 0.5330004249999917 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf463 2.50228643329 0 99.371499375 0.477750937499998 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf464 6.14799414721 0 99.41999945 0.6700005499999918 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf465 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf466 2.47124761202 0 98.272500275 2.1262495874999985 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf467 3.85964385182 0 99.4570007 0.632999300000003 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf468 6.10789096832 0 99.412499925 0.6775000749999919 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf469 6.30106886729 0 99.4435008 0.6464992000000024 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf470 2.47124761202 0 98.813000175 1.315499737499998 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf471 3.86059861244 0 99.49750055 0.5924994499999997 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf472 3.92040413524 0 99.553499375 0.5365006249999965 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf473 2.01610051566 0 99.68 0.4099999999999909 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf474 2.50228643329 0 99.40199945 0.6880005499999925 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf475 1.99590274244 0 99.440002 0.6499979999999909 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf476 6.14799414721 0 99.4554998 0.6345001999999994 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf477 2.50228643329 0 99.3669987 0.48450195000000207 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf478 2.77405457184 0 99.7349998 0.35500020000000065 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf479 2.01610051566 0 98.120003 2.354995500000001 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf480 2.50228643329 0 99.00699965 1.0245005250000006 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf481 6.14799414721 0 99.540999725 0.5490002749999917 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf482 3.33055390722 0 99.7384998 0.35150019999999815 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf483 3.7862916372 0 99.594000025 0.49599997499999804 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf484 6.10789096832 0 99.580000225 0.5099997749999915 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf485 2.47124761202 0 99.50349965 0.5865003500000029 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf486 2.50228643329 0 99.19349965 0.7447505249999864 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf487 6.30106886729 0 99.457000125 0.6329998750000044 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf488 3.38717868509 0 99.471999825 0.6180001750000003 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf489 1.97610564729 0 99.080002 0.9149970000000067 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf490 3.86059861244 0 99.607000275 0.48299972499999344 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf491 6.10789096832 0 99.334499875 0.5332501874999878 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf492 3.80166404425 0 99.413499775 0.6765002249999924 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf493 5.02870270579 0 99.448500525 0.6414994749999977 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf494 2.47124761202 0 99.21199905 0.7170014249999923 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf495 2.00016617632 0 98.980003 1.064995500000002 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf496 6.14799414721 0 99.503000075 0.586999924999995 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf497 2.50228643329 0 99.098501075 0.8872483874999944 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf498 5.02870270579 0 99.274000025 0.6239999624999868 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf499 4.90489779833 0 99.58150035 0.5084996499999989 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf500 2.01610051566 0 98.459999 1.8450015000000022 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf501 3.85964385182 0 99.44050065 0.6494993499999936 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf502 2.50228643329 0 99.362999025 0.49050146250000637 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf503 3.86059861244 0 99.607500325 0.482499674999994 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf504 2.01610051566 0 99.040001 0.974998499999991 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf505 5.02870270579 0 99.439999975 0.6500000249999914 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf506 2.47778695782 0 99.422999625 0.667000374999995 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf507 3.80166404425 0 99.517499625 0.5725003749999985 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf508 1.99590274244 0 99.400002 0.6899979999999971 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf509 2.01610051566 0 99.080002 0.9149970000000067 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf510 2.01610051566 0 97.82 2.805000000000007 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf511 2.50228643329 0 99.437500525 0.6524994749999934 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf512 2.01610051566 0 97.760002 2.8949969999999965 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf513 6.30106886729 0 99.505500025 0.5844999749999943 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf514 2.01610051566 0 99.580002 0.5099980000000045 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf515 5.02870270579 0 99.473500475 0.6164995250000033 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf516 3.38717868509 0 99.340999725 0.5235004124999918 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf517 3.92040413524 0 99.55600015 0.5339998499999951 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf518 2.81322619695 0 99.582499875 0.5075001250000014 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf519 5.02870270579 0 99.434999875 0.6550001249999952 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf520 5.02870270579 0 99.471500325 0.6184996750000039 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf521 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf522 2.01610051566 0 98.480003 1.814995500000002 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf523 3.38717868509 0 99.560999675 0.5290003249999927 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf524 3.85964385182 0 99.438000125 0.6519998749999957 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf525 2.50228643329 0 99.382499475 0.46125078749999204 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf526 2.50228643329 0 99.464500775 0.625499224999993 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf527 3.38717868509 0 99.577499825 0.5125001749999939 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf528 2.50228643329 0 98.99950025 1.0357496250000011 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf529 2.01610051566 0 99.160004 0.7949939999999955 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf530 2.50228643329 0 99.387999225 0.45300116249999434 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf531 2.01610051566 0 98.839996 1.2750059999999976 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf532 2.01610051566 0 97.879997 2.715004499999992 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf533 2.50228643329 0 99.0314993 0.9877510500000071 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf534 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf535 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf536 2.50228643329 0 98.74550055 1.4167491749999925 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf537 6.30106886729 0 99.462 0.6279999999999945 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf538 3.92040413524 0 99.32449935 0.548250975000002 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf539 3.85964385182 0 99.470000825 0.6199991749999981 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf540 2.50228643329 0 99.37150005 0.4777499250000048 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf541 6.30106886729 0 99.44350025 0.6464997499999982 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf542 6.30106886729 0 99.456000225 0.6339997750000009 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf543 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf544 2.01610051566 0 98.18 2.2649999999999864 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf545 2.01610051566 0 98.959999 1.0950015000000022 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf546 5.02870270579 0 99.516499975 0.5735000249999956 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf547 2.01610051566 0 97.620003 3.104995500000001 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf548 2.01610051566 0 99.68 0.4099999999999909 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf549 3.38717868509 0 99.546999725 0.5430002749999915 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf550 2.01610051566 0 99.540001 0.549998999999994 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf551 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf552 3.92040413524 0 99.542999425 0.5470005749999928 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf553 3.38717868509 0 99.472000875 0.6179991249999915 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf554 2.50228643329 0 99.0099999 1.0200001500000013 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf555 2.01610051566 0 98.120003 2.354995500000001 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf556 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf557 2.01610051566 0 98.540001 1.724998499999991 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf558 3.85964385182 0 99.272500475 0.626249287499995 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf559 6.30106886729 0 99.43700055 0.6529994500000044 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf560 2.50228643329 0 98.316000025 2.0609999625000057 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf561 3.38717868509 0 99.5814995 0.508500499999991 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf562 2.01610051566 0 98.0 2.5349999999999966 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf563 2.50228643329 0 97.960999825 2.593500262499994 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf564 2.01610051566 0 99.220001 0.7049985000000021 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf565 6.30106886729 0 99.276000625 0.6209990625000046 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf566 2.50228643329 0 98.91400005 1.1639999249999988 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf567 3.92040413524 0 99.577500375 0.5124996249999981 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf568 2.50228643329 0 99.423500325 0.6664996749999915 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf569 3.85964385182 0 99.477500425 0.6124995749999954 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf570 2.47778695782 0 99.413500025 0.676499974999993 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf571 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf572 6.14799414721 0 99.504500025 0.5854999749999991 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf573 3.84474688915 0 99.570000775 0.5199992250000008 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf574 2.47778695782 0 99.013500075 1.0147498875000025 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf575 2.47124761202 0 99.385499425 0.4567508624999874 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf576 2.00016617632 0 99.559998 0.5300020000000046 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf577 2.81322619695 0 99.545499275 0.5445007250000004 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf578 5.02870270579 0 99.44550065 0.6444993499999981 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf579 2.50228643329 0 99.1870008 0.7544987999999861 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf580 2.78229733114 0 99.51649875 0.5735012500000011 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf581 5.92620561097 0 99.553999775 0.5360002250000037 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf582 2.50228643329 0 99.368498525 0.4822522124999864 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf583 3.38717868509 0 99.547499275 0.5425007249999908 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf584 2.78229733114 0 99.57900015 0.5109998499999989 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf585 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf586 2.50228643329 0 99.375499 0.4717514999999892 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf587 2.81322619695 0 99.47250045 0.6174995499999995 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf588 2.50228643329 0 99.28450095 0.6082485750000046 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf589 2.00016617632 0 99.639999 0.45000099999999466 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf590 6.14799414721 0 99.5135004 0.5764995999999997 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf591 6.14799414721 0 99.4965 0.5935000000000002 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf592 3.34244261096 0 99.561499825 0.5285001749999992 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf593 2.50228643329 0 99.029999175 0.990001237499996 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf594 6.30106886729 0 99.442000175 0.6479998249999938 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf595 2.47778695782 0 99.453499975 0.636500024999998 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf596 3.92040413524 0 99.56400005 0.5259999499999936 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf597 6.10789096832 0 99.339999275 0.5250010875000015 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf598 2.47124761202 0 99.3599983 0.49500254999999527 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf599 2.50228643329 0 99.469500725 0.6204992749999946 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf600 2.81322619695 0 99.589500425 0.5004995750000006 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf601 6.30106886729 0 99.2495001 0.6607498499999878 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf602 3.80166404425 0 99.522498725 0.5675012749999923 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf603 2.77405457184 0 99.742999825 0.34700017499999947 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf604 6.14799414721 0 99.5524999 0.5375000999999969 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf605 6.30106886729 0 99.446000725 0.6439992749999931 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf606 4.93072604433 0 99.47500045 0.6149995500000017 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf607 3.38717868509 0 99.574000075 0.515999924999997 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf608 3.33055390722 0 99.73199955 0.35800045000000014 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf609 3.92040413524 0 99.575000325 0.5149996749999929 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf610 4.90489779833 0 99.31799945 0.5580008249999935 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf611 6.30106886729 0 99.466000575 0.6239994250000024 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf612 6.14799414721 0 99.46900035 0.620999649999996 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf613 6.10789096832 0 99.4139997 0.6760002999999927 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf614 3.7862916372 0 99.3359989 0.5310016499999861 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf615 2.47778695782 0 99.5030006 0.5869994000000048 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf616 3.34244261096 0 99.577500025 0.5124999749999916 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf617 3.92040413524 0 99.5005001 0.5894999000000013 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf618 6.14799414721 0 99.420000025 0.6699999750000046 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf619 2.47778695782 0 99.4209994 0.6690005999999983 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf620 2.47124761202 0 99.224499975 0.6982500374999958 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf621 6.30106886729 0 99.428499925 0.6615000750000007 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf622 3.85964385182 0 99.435500575 0.6544994249999917 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf623 2.00016617632 0 99.660004 0.42999599999999705 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf624 1.99590274244 0 99.260002 0.6449969999999965 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf625 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf626 3.84474688915 0 99.7199993 0.37000069999999996 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf627 3.86059861244 0 99.49699995 0.5930000499999949 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf628 3.92040413524 0 99.320000325 0.5549995125000038 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf629 1.99590274244 0 99.400002 0.6899979999999971 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf630 3.85964385182 0 99.523999725 0.5660002750000018 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf631 1.97610564729 0 99.660004 0.42999599999999705 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf632 6.30106886729 0 99.512499875 0.5775001249999946 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf633 2.78229733114 0 99.576000025 0.5139999749999987 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf634 6.10789096832 0 99.57550015 0.5144998500000014 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf635 4.90489779833 0 99.329999825 0.5400002624999942 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf636 3.34244261096 0 99.58599985 0.5040001500000045 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf637 1.97610564729 0 99.379997 0.46500449999999205 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf638 6.30106886729 0 99.455499575 0.6345004249999932 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf639 3.85964385182 0 99.263500425 0.639749362499991 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf640 6.14799414721 0 99.422499575 0.6675004249999944 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf641 2.47778695782 0 99.417500075 0.6724999249999911 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf642 6.14799414721 0 99.490500175 0.5994998250000038 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf643 2.50228643329 0 99.374999575 0.4725006374999907 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf644 2.50228643329 0 99.011500375 1.017749437500001 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf645 1.99590274244 0 98.940002 1.1249969999999863 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf646 2.01610051566 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf647 2.47778695782 0 99.503999925 0.5860000749999955 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf648 6.10789096832 0 99.33900025 0.5264996249999996 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf649 6.10789096832 0 99.4024996 0.687500399999999 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf650 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf651 2.50228643329 0 98.8884997 1.202250450000001 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf652 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf653 3.92040413524 0 99.47850045 0.6114995499999992 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf654 3.77195447337 0 99.641501475 0.4484985249999994 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf655 5.02870270579 0 99.2675 0.6337499999999991 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf656 6.30106886729 0 99.470500425 0.6194995750000004 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf657 2.47778695782 0 99.435000375 0.6549996249999964 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf658 6.30106886729 0 99.43799965 0.652000350000003 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf659 2.50228643329 0 98.3299994 2.0400008999999883 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf660 6.14799414721 0 99.508499725 0.5815002750000048 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf661 3.38717868509 0 99.33649845 0.5302523250000064 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf662 2.81322619695 0 99.5795002 0.5104997999999995 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf663 2.01610051566 0 99.459999 0.6300010000000015 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf664 2.47124761202 0 99.494999875 0.5950001249999929 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf665 3.80166404425 0 99.47750075 0.6124992499999934 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf666 3.86059861244 0 99.58 0.5099999999999995 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf667 6.30106886729 0 99.441500025 0.6484999750000014 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf668 1.99590274244 0 98.440002 1.8749969999999863 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf669 3.80166404425 0 99.525999075 0.5640009249999963 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf670 2.47124761202 0 99.323999 0.5490014999999957 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf671 2.50228643329 0 98.73550015 1.4317497750000072 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf672 6.30106886729 0 99.265999825 0.636000262500005 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf673 6.14799414721 0 99.4690007 0.6209993000000026 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf674 2.00016617632 0 99.019997 1.0050044999999912 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf675 4.78704248134 0 99.554999925 0.5350000749999936 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf676 6.14799414721 0 99.559499325 0.5305006749999933 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf677 2.50228643329 0 99.29100025 0.5984996250000023 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf678 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf679 2.50228643329 0 99.366998475 0.4845022874999927 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf680 2.50228643329 0 97.97399995 2.5740000749999865 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf681 2.01610051566 0 98.459999 1.8450015000000022 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf682 3.92040413524 0 99.5735001 0.5164998999999938 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf683 2.50228643329 0 99.032999775 0.9855003375000067 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf684 2.50228643329 0 99.280000725 0.6149989125000062 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf685 2.50228643329 0 98.88899965 1.201500524999993 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf686 2.50228643329 0 99.195000075 0.7424998875000028 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf687 2.50228643329 0 99.018499825 1.007250262499987 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf688 2.01610051566 0 97.699997 2.9850045000000023 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf689 3.85964385182 0 99.27300055 0.6254991749999874 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf690 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf691 6.30106886729 0 99.468499525 0.6215004749999992 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf692 2.01610051566 0 99.080002 0.9149970000000067 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf693 3.85964385182 0 99.4565008 0.6334991999999972 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf694 2.01610051566 0 99.580002 0.5099980000000045 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf695 6.30106886729 0 99.447499925 0.6425000749999953 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf696 2.01610051566 0 99.459999 0.6300010000000015 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf697 6.30106886729 0 99.501999875 0.5880001250000021 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf698 5.02870270579 0 99.467500675 0.6224993250000012 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf699 2.50228643329 0 99.435499925 0.6545000749999957 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf700 2.50228643329 0 99.4214999 0.6685000999999972 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf701 2.01610051566 0 98.959999 1.0950015000000022 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf702 3.85964385182 0 99.47450035 0.6154996499999982 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf703 2.81322619695 0 99.4615009 0.6284990999999934 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf704 2.50228643329 0 98.3229992 2.0505011999999994 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf705 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf706 3.85964385182 0 99.436500175 0.6534998249999916 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf707 6.30106886729 0 99.43250055 0.6574994499999974 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf708 3.92040413524 0 99.559498975 0.530501025000001 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf709 2.01610051566 0 97.82 2.805000000000007 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf710 2.01610051566 0 98.900002 1.1849969999999956 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf711 2.50228643329 0 97.63800065 3.077999025000004 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf712 2.01610051566 0 97.760002 2.8949969999999965 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf713 3.92040413524 0 99.318500375 0.5572494374999977 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf714 2.50228643329 0 98.90349935 1.1797509749999904 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf715 3.92040413524 0 99.5524996 0.5375003999999933 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf716 6.30106886729 0 99.2610001 0.6434998499999907 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf717 6.30106886729 0 99.446500275 0.6434997249999924 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf718 3.38717868509 0 99.567999775 0.5220002249999937 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf719 2.50228643329 0 99.101000225 0.883499662499986 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf720 3.85964385182 0 99.478500775 0.6114992249999972 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf721 2.47778695782 0 99.39299885 0.6970011499999998 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf722 3.85964385182 0 99.528499175 0.5615008250000045 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf723 6.14799414721 0 99.460000125 0.6299998750000043 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf724 2.50228643329 0 99.46000075 0.6299992499999917 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf725 3.92040413524 0 99.332999525 0.5355007124999887 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf726 3.80166404425 0 99.570999825 0.5190001749999965 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf727 2.47778695782 0 99.303500625 0.5797490624999995 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf728 2.01610051566 0 99.400002 0.6899979999999971 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf729 3.92040413524 0 99.4830003 0.6069996999999973 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf730 6.30106886729 0 99.519000275 0.5709997250000015 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf731 5.02870270579 0 99.5234998 0.5665002000000016 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf732 3.86059861244 0 99.611500325 0.47849967500000334 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf733 2.01610051566 0 99.68 0.4099999999999909 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf734 2.50228643329 0 99.033498825 0.9847517625000037 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf735 3.38717868509 0 99.3329991 0.5355013500000041 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf736 2.01610051566 0 98.900002 1.1849969999999956 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf737 6.14799414721 0 99.50600015 0.5839998499999922 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf738 2.01610051566 0 99.080002 0.9149970000000067 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf739 3.34244261096 0 99.5624996 0.5275004000000024 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf740 2.47124761202 0 99.36199835 0.4920024750000067 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf741 6.14799414721 0 99.5104998 0.5795001999999926 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf742 2.00016617632 0 99.019997 1.0050044999999912 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf743 2.81322619695 0 99.333499225 0.534751162500001 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf744 1.99590274244 0 99.599998 0.4900019999999984 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf745 3.38717868509 0 99.5830001 0.5069998999999911 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf746 2.01610051566 0 99.459999 0.6300010000000015 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf747 2.81322619695 0 99.587500225 0.5024997749999983 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf748 3.85964385182 0 99.462500975 0.6274990250000002 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf749 6.30106886729 0 99.44100045 0.6489995499999935 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf750 3.86059861244 0 99.5044995 0.5855005000000034 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf751 6.10789096832 0 99.337499975 0.5287500374999965 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf752 3.92040413524 0 99.580999475 0.509000524999999 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf753 2.50228643329 0 99.394999575 0.6950004249999978 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf754 6.14799414721 0 99.420499375 0.6695006249999921 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf755 2.47778695782 0 99.374499425 0.47325086250000226 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf756 6.30106886729 0 99.43549985 0.6545001499999984 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf757 2.50228643329 0 99.43300005 0.6569999499999938 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf758 3.84474688915 0 99.4280003 0.6619997000000041 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf759 5.92620561097 0 99.533499625 0.5565003749999932 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf760 6.10789096832 0 99.575999775 0.5140002249999981 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf761 6.10789096832 0 99.40649965 0.6835003499999971 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf762 2.00016617632 0 99.639999 0.45000099999999466 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf763 6.14799414721 0 99.505500225 0.584499774999992 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf764 2.47778695782 0 99.322999075 0.5505013874999989 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf765 6.30106886729 0 99.4675 0.6224999999999966 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf766 4.90489779833 0 99.32749945 0.5437508249999894 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf767 3.85964385182 0 99.432499925 0.6575000749999959 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf768 6.10789096832 0 99.578999975 0.5110000249999956 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf769 4.93072604433 0 99.5179994 0.5720006000000041 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf770 2.00016617632 0 99.239998 0.6750029999999967 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf771 3.86059861244 0 99.557999225 0.5320007749999945 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf772 2.78229733114 0 99.614500525 0.4754994750000009 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf773 5.02870270579 0 99.432000675 0.6579993250000001 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf774 4.93072604433 0 99.517499825 0.5725001749999962 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf775 2.47778695782 0 99.4125002 0.6774998000000011 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf776 3.77195447337 0 99.6390002 0.4509997999999996 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf777 2.81322619695 0 99.538499125 0.5515008749999964 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf778 2.01610051566 0 98.900002 1.1849969999999956 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf779 2.01610051566 0 99.400002 0.6899979999999971 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf780 3.92040413524 0 99.4845006 0.6054993999999937 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf781 6.30106886729 0 99.501500475 0.5884995249999975 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf782 2.77405457184 0 99.736499225 0.3535007749999949 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf783 3.84474688915 0 99.42049985 0.669500149999999 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf784 2.01610051566 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf785 2.50228643329 0 99.36849925 0.48225112499999767 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf786 6.30106886729 0 99.465499775 0.6245002249999999 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf787 3.85964385182 0 99.481500875 0.608499125000003 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf788 3.80166404425 0 99.42100005 0.6689999499999942 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf789 4.93072604433 0 99.511499675 0.5785003250000017 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf790 6.14799414721 0 99.5054997 0.5845002999999963 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf791 2.01610051566 0 99.540001 0.549998999999994 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf792 2.44096937877 0 99.4470006 0.6429994000000022 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf793 2.47778695782 0 98.260500175 2.1442497374999903 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf794 6.10789096832 0 99.33799975 0.5280003750000049 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf795 2.00016617632 0 99.360001 0.49499850000000123 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf796 2.44096937877 0 99.353998575 0.5040021374999881 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf797 2.50228643329 0 99.42099945 0.6690005500000012 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf798 6.30106886729 0 99.442500625 0.6474993750000039 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf799 6.30106886729 0 99.428500225 0.6614997750000043 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf800 3.92040413524 0 99.56549965 0.5245003499999911 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf801 2.47124761202 0 99.185500075 0.7567498875000069 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf802 3.92040413524 0 99.53350015 0.556499850000003 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf803 6.14799414721 0 99.5049996 0.5850003999999928 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf804 2.01610051566 0 98.459999 1.8450015000000022 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf805 2.00016617632 0 98.379997 1.965004499999992 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf806 6.14799414721 0 99.464499625 0.6255003749999958 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf807 1.99590274244 0 98.660004 1.5449939999999955 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf808 3.85964385182 0 99.53049925 0.5595007499999923 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf809 2.01610051566 0 99.220001 0.7049985000000021 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf810 2.01610051566 0 98.120003 2.354995500000001 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf811 3.86059861244 0 99.591500125 0.4984998750000017 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf812 2.50228643329 0 99.382499425 0.4612508624999876 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf813 2.47124761202 0 98.818999725 1.3065004125000002 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf814 3.86059861244 0 99.6200006 0.4699994000000004 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf815 2.73595882486 0 99.6375013 0.45249870000000103 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf816 6.14799414721 0 99.545999725 0.5440002749999963 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf817 3.7862916372 0 99.3424988 0.5212517999999946 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf818 2.77405457184 0 99.4264998 0.6635001999999958 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf819 2.50228643329 0 99.0920006 0.896999099999988 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf820 5.02870270579 0 99.476000675 0.6139993250000032 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf821 3.92040413524 0 99.3214991 0.5527513500000012 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf822 2.47124761202 0 98.535999325 1.7310010124999877 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf823 2.78229733114 0 99.551999525 0.5380004749999984 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf824 2.01610051566 0 99.220001 0.7049985000000021 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf825 3.85964385182 0 99.473999875 0.6160001249999937 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf826 2.01610051566 0 99.32 0.5550000000000068 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf827 2.01610051566 0 99.040001 0.974998499999991 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf828 6.30106886729 0 99.46050055 0.6294994499999916 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf829 2.50228643329 0 99.4435005 0.6464994999999988 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf830 2.81322619695 0 99.538499475 0.551500525000003 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf831 2.50228643329 0 98.230000275 2.1899995875000045 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf832 2.50228643329 0 99.466500825 0.6234991750000006 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf833 2.01610051566 0 99.080002 0.9149970000000067 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf834 6.30106886729 0 99.4185003 0.6714996999999926 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf835 2.50228643329 0 98.889999625 1.2000005624999943 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf836 3.38717868509 0 99.478001025 0.6119989750000002 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf837 5.02870270579 0 99.467500025 0.622499974999991 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf838 2.50228643329 0 99.389499275 0.45075108749998805 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf839 2.81322619695 0 99.472000525 0.6179994749999992 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf840 2.01610051566 0 99.540001 0.549998999999994 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf841 2.01610051566 0 99.68 0.4099999999999909 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf842 3.85964385182 0 99.52599925 0.5640007499999996 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf843 2.50228643329 0 99.29100025 0.5984996250000023 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf844 3.92040413524 0 99.5589993 0.5310007000000013 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf845 3.85964385182 0 99.47500025 0.6149997500000041 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf846 3.85964385182 0 99.469500475 0.620499524999994 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf847 3.92040413524 0 99.48150035 0.6084996499999932 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf848 3.92040413524 0 99.536 0.5539999999999964 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf849 2.50228643329 0 99.4420006 0.6479993999999977 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf850 2.50228643329 0 97.654500925 3.0532486125000062 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf851 6.30106886729 0 99.4430001 0.6469998999999916 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf852 2.01610051566 0 98.900002 1.1849969999999956 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf853 5.02870270579 0 99.458000775 0.6319992249999956 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf854 3.38717868509 0 99.329499675 0.5407504875000058 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf855 2.50228643329 0 98.3030002 2.080499699999997 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf856 3.92040413524 0 99.33299985 0.5355002250000069 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf857 2.01610051566 0 97.900002 2.6849969999999956 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf858 2.01610051566 0 97.620003 3.104995500000001 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf859 3.38717868509 0 99.57400005 0.5159999500000026 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf860 3.92040413524 0 99.542999475 0.5470005249999957 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf861 2.01610051566 0 98.540001 1.724998499999991 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf862 6.30106886729 0 99.504999775 0.585000224999996 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf863 2.50228643329 0 99.096000725 0.8909989125000024 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf864 2.50228643329 0 99.368999175 0.48150123749999807 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf865 6.30106886729 0 99.24349955 0.6697506750000031 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf866 5.02870270579 0 99.48200025 0.6079997499999991 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf867 5.02870270579 0 99.43700025 0.6529997500000008 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf868 2.50228643329 0 99.370499525 0.4792507124999972 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf869 2.50228643329 0 99.41749965 0.6725003500000014 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf870 3.85964385182 0 99.431000575 0.658999424999999 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf871 6.30106886729 0 99.46350045 0.6264995499999998 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf872 5.02870270579 0 99.512999425 0.5770005749999939 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf873 5.02870270579 0 99.269500575 0.630749137500004 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf874 2.50228643329 0 99.180999675 0.7635004875000035 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf875 2.50228643329 0 99.409499525 0.6805004749999967 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf876 2.50228643329 0 98.9994999 1.0357501499999913 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf877 2.47778695782 0 99.42899995 0.6610000499999927 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf878 3.38717868509 0 99.483500225 0.6064997749999975 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf879 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf880 3.33055390722 0 99.4320002 0.6579997999999933 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf881 3.33055390722 0 99.5704994 0.519500599999995 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf882 3.86059861244 0 99.610500025 0.4794999750000045 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf883 2.81322619695 0 99.577000375 0.5129996250000005 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf884 1.99590274244 0 99.459999 0.6300010000000015 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf885 2.47778695782 0 99.40549945 0.6845005500000042 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf886 3.38717868509 0 99.569 0.5209999999999951 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf887 2.00016617632 0 99.239998 0.6750029999999967 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf888 2.50228643329 0 99.43799995 0.6520000499999924 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf889 2.50228643329 0 99.473001075 0.6169989249999986 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf890 6.14799414721 0 99.555999325 0.5340006749999958 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf891 2.00016617632 0 99.639999 0.45000099999999466 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf892 4.90489779833 0 99.4159999 0.674000099999995 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf893 4.90489779833 0 99.3299996 0.5400006000000062 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf894 4.93072604433 0 99.52499955 0.5650004499999938 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf895 3.86059861244 0 99.506999625 0.5830003749999918 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf896 3.86059861244 0 99.582499975 0.5075000249999931 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf897 3.92040413524 0 99.542500075 0.5474999249999911 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf898 2.47778695782 0 99.442000175 0.6479998249999938 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf899 2.50228643329 0 99.28750065 0.603749024999999 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf900 6.30106886729 0 99.43350035 0.656499649999995 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf901 6.14799414721 0 99.49950045 0.5904995499999984 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf902 5.02870270579 0 99.47200005 0.6179999499999923 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf903 3.34244261096 0 99.613500575 0.4764994249999944 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf904 2.01610051566 0 99.68 0.4099999999999909 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf905 3.7862916372 0 99.41650005 0.6734999500000015 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf906 6.14799414721 0 99.455000325 0.6349996749999974 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf907 2.50228643329 0 99.4335006 0.6564993999999956 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf908 6.14799414721 0 99.50550005 0.5844999500000029 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf909 2.47124761202 0 99.319999725 0.555000412499993 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf910 2.50228643329 0 99.42550005 0.6644999500000012 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf911 6.10789096832 0 99.56899955 0.5210004499999968 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf912 3.85964385182 0 99.48000055 0.609999449999998 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf913 2.00016617632 0 99.080002 0.9149970000000067 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf914 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf915 2.44096937877 0 99.01699955 1.0095006750000053 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf916 4.90489779833 0 99.58049985 0.5095001500000024 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf917 2.47124761202 0 99.49900015 0.5909998499999972 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf918 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf919 4.93072604433 0 99.5589997 0.5310002999999966 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf920 5.02870270579 0 99.473000225 0.6169997749999908 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf921 3.92040413524 0 99.55299925 0.5370007499999986 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf922 3.85964385182 0 99.53349935 0.5565006499999982 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf923 6.30106886729 0 99.472000025 0.6179999749999979 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf924 2.50228643329 0 99.182499825 0.7612502625000062 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf925 3.86059861244 0 99.60850085 0.4814991499999991 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf926 6.14799414721 0 99.412 0.6779999999999916 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf927 6.30106886729 0 99.4530003 0.6369996999999984 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf928 6.30106886729 0 99.499999825 0.5900001749999945 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf929 6.10789096832 0 99.407000125 0.6829998750000016 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf930 2.47778695782 0 99.322999475 0.5505007874999919 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf931 6.30106886729 0 99.419000225 0.6709997749999929 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf932 3.80166404425 0 99.48450075 0.6054992500000026 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf933 2.50228643329 0 98.997999775 1.0380003375000015 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf934 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf935 6.30106886729 0 99.467999775 0.6220002250000022 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf936 2.47778695782 0 98.96700135 1.0844979749999908 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf937 3.77195447337 0 99.637001 0.4529989999999998 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf938 6.30106886729 0 99.512999625 0.5770003749999916 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf939 2.47778695782 0 98.9210002 1.1534997000000047 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf940 1.99590274244 0 98.139999 2.325001499999992 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf941 2.47124761202 0 99.3819992 0.4620012000000031 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf942 2.50228643329 0 99.393999775 0.6960002250000002 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf943 2.47778695782 0 99.4064995 0.6835005000000024 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf944 6.14799414721 0 99.41999975 0.6700002499999954 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf945 3.71567552873 0 99.5574991 0.5325008999999966 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf946 4.93072604433 0 99.57149935 0.5185006500000015 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf947 2.00016617632 0 98.419998 1.9050029999999865 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf948 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf949 3.92040413524 0 99.55049875 0.539501249999995 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf950 3.34244261096 0 99.5865 0.5034999999999968 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf951 3.38717868509 0 99.541499675 0.5485003250000006 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf952 2.01610051566 0 99.160004 0.7949939999999955 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf953 2.50228643329 0 98.90750005 1.1737499250000027 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf954 2.44096937877 0 98.241500025 2.1727499625000064 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf955 2.01610051566 0 98.459999 1.8450015000000022 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf956 2.50228643329 0 99.439500375 0.6504996250000034 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf957 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf958 3.38717868509 0 99.5725004 0.5174996000000022 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf959 2.50228643329 0 99.001500125 1.0327498124999863 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf960 2.44096937877 0 99.376498775 0.470251837499994 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf961 3.92040413524 0 99.5484993 0.5415006999999946 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf962 1.99590274244 0 98.139999 2.325001499999992 -1 gpu conv samp 35 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 35 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf963 3.7862916372 0 99.584000125 0.5059998749999949 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf964 2.47124761202 0 99.026499175 0.9952512374999998 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf965 3.80166404425 0 99.516499625 0.5735003750000033 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf966 3.85964385182 0 99.477000125 0.6129998749999942 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf967 1.99590274244 0 99.639999 0.45000099999999466 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf968 3.86059861244 0 99.56150015 0.5284998499999972 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf969 6.14799414721 0 99.508499775 0.5815002249999935 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf970 2.50228643329 0 98.90099985 1.1835002249999889 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf971 1.99590274244 0 98.660004 1.5449939999999955 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf972 1.99590274244 0 99.260002 0.6449969999999965 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf973 2.00016617632 0 99.360001 0.49499850000000123 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf974 2.47124761202 0 98.536999 1.729501500000005 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf975 2.50228643329 0 98.23449995 2.1832500749999966 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf976 6.10789096832 0 99.406999775 0.683000224999995 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf977 6.14799414721 0 99.545499575 0.544500425000004 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf978 3.86059861244 0 99.61050155 0.4794984500000027 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf979 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf980 2.50228643329 0 99.4215 0.6685000000000031 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf981 3.7862916372 0 99.42750005 0.6624999499999916 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf982 3.34244261096 0 99.562499425 0.5275005749999991 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf983 2.01610051566 0 98.18 2.2649999999999864 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf984 2.00016617632 0 99.559998 0.5300020000000046 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf985 2.00016617632 0 98.980003 1.064995500000002 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf986 5.92620561097 0 99.5314992 0.5585007999999988 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf987 2.78229733114 0 99.553500025 0.5364999749999925 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf988 2.47778695782 0 99.183000125 0.7604998124999867 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf989 3.86059861244 0 99.609000625 0.48099937500000467 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf990 3.86059861244 0 99.5019997 0.5880002999999988 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf991 6.30106886729 0 99.457000075 0.6329999250000015 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf992 3.84474688915 0 99.568499675 0.5215003249999995 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf993 6.14799414721 0 99.46 0.630000000000004 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf994 3.38717868509 0 99.3379988 0.5280018000000055 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf995 6.10789096832 0 99.577499425 0.5125005749999986 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf996 3.86059861244 0 99.5895003 0.5004997000000003 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf997 2.50228643329 0 99.37899965 0.4665005250000007 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf998 3.85964385182 0 99.475000975 0.6149990249999974 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf999 2.01610051566 0 98.480003 1.814995500000002 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1000 3.92040413524 0 99.559499525 0.530500474999991 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1001 6.30106886729 0 99.427999925 0.6620000750000031 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1002 2.01610051566 0 99.459999 0.6300010000000015 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1003 2.50228643329 0 98.336499375 2.030250937499993 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1004 3.85964385182 0 99.4305002 0.6594998000000004 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1005 2.01610051566 0 99.080002 0.9149970000000067 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1006 2.01610051566 0 99.580002 0.5099980000000045 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1007 2.01610051566 0 99.0 1.0349999999999966 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1008 2.50228643329 0 99.2740006 0.623999100000006 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1009 2.01610051566 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1010 5.02870270579 0 99.26250005 0.6412499249999968 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1011 2.50228643329 0 99.369499075 0.4807513875000069 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1012 5.02870270579 0 99.44500045 0.6449995500000029 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1013 2.50228643329 0 97.6395004 3.0757493999999923 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1014 5.02870270579 0 99.474500525 0.6154994750000015 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1015 3.92040413524 0 99.492000325 0.5979996749999913 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1016 3.92040413524 0 99.331499525 0.5377507124999994 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1017 6.30106886729 0 99.426500225 0.6634997749999997 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1018 3.38717868509 0 99.543499225 0.5465007749999927 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1019 3.38717868509 0 99.335999 0.531001499999995 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1020 3.92040413524 0 99.5434993 0.5465007000000043 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1021 2.50228643329 0 99.002499975 1.0312500374999871 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1022 3.38717868509 0 99.586500475 0.5034995250000037 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1023 2.01610051566 0 99.160004 0.7949939999999955 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1024 5.02870270579 0 99.51149985 0.5785001499999908 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1025 3.85964385182 0 99.2685002 0.6322496999999885 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1026 2.81322619695 0 99.546999225 0.5430007750000044 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1027 3.38717868509 0 99.47600085 0.6139991499999923 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1028 2.81322619695 0 99.588999925 0.5010000750000018 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1029 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1030 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1031 2.01610051566 0 98.839996 1.2750059999999976 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1032 2.50228643329 0 98.9885 1.0522499999999937 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1033 2.01610051566 0 99.32 0.5550000000000068 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1034 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1035 6.30106886729 0 99.516000025 0.573999975000001 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1036 6.30106886729 0 99.2645006 0.6382490999999888 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1037 6.30106886729 0 99.45900045 0.6309995499999929 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1038 2.50228643329 0 99.369999225 0.48000116249999536 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1039 2.50228643329 0 99.37249935 0.47625097499999924 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1040 5.02870270579 0 99.4795001 0.6104999000000021 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1041 2.50228643329 0 99.469000975 0.6209990249999976 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1042 6.30106886729 0 99.45850035 0.6314996500000035 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1043 2.81322619695 0 99.33899865 0.5265020250000063 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1044 3.85964385182 0 99.531499475 0.5585005249999938 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1045 3.85964385182 0 99.470500225 0.6194997750000028 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1046 1.99590274244 0 99.639999 0.45000099999999466 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1047 1.99590274244 0 99.440002 0.6499979999999909 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1048 2.47124761202 0 99.19199955 0.7470006749999882 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1049 2.00016617632 0 99.620003 0.4699970000000008 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1050 2.50228643329 0 99.43900005 0.6509999499999936 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1051 2.50228643329 0 99.035499225 0.9817511625000037 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1052 6.10789096832 0 99.575000075 0.5149999249999923 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1053 2.47124761202 0 99.26700025 0.6344996250000037 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1054 2.44096937877 0 99.011999775 1.0170003374999865 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1055 2.01610051566 0 99.0 1.0349999999999966 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1056 4.93072604433 0 99.5204999 0.5695000999999934 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1057 3.92040413524 0 99.576999675 0.5130003250000016 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1058 6.14799414721 0 99.550499275 0.5395007249999907 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1059 6.10789096832 0 99.41649995 0.6735000499999956 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1060 6.14799414721 0 99.45850075 0.6314992499999988 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1061 2.47778695782 0 99.001499775 1.0327503374999978 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1062 2.00016617632 0 99.199997 0.7350045000000023 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1063 2.50228643329 0 99.419999775 0.670000225000004 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1064 2.50228643329 0 99.01399965 1.014000524999993 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1065 3.85964385182 0 99.457500475 0.6324995249999944 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1066 4.90489779833 0 99.413499575 0.6765004249999947 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1067 2.50228643329 0 99.363998825 0.4890017625000027 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1068 2.47778695782 0 99.4510009 0.6389991000000009 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1069 1.97610564729 0 99.080002 0.9149970000000067 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1070 2.44096937877 0 99.3414992 0.5227511999999948 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1071 3.92040413524 0 99.46549975 0.6245002499999913 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1072 6.14799414721 0 99.502499625 0.5875003749999991 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1073 5.02870270579 0 99.26700025 0.6344996250000037 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1074 6.10789096832 0 99.327999975 0.5430000375000006 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1075 3.7862916372 0 99.425500025 0.6644999749999926 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1076 3.80166404425 0 99.519499 0.5705010000000016 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1077 2.47778695782 0 99.3090006 0.5714990999999898 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1078 2.78229733114 0 99.5504996 0.5395004000000029 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1079 6.14799414721 0 99.5105 0.5795000000000045 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1080 2.47124761202 0 99.356998675 0.49950198749999686 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1081 3.92040413524 0 99.551499975 0.538500024999999 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1082 3.92040413524 0 99.5609997 0.5290003000000013 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1083 2.00016617632 0 99.559998 0.5300020000000046 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1084 2.01610051566 0 99.68 0.4099999999999909 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1085 2.01610051566 0 99.400002 0.6899979999999971 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1086 3.80166404425 0 99.5354992 0.5545007999999939 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1087 6.30106886729 0 99.42449985 0.6655001499999941 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1088 6.30106886729 0 99.510999925 0.5790000750000047 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1089 2.47778695782 0 99.405999625 0.6840003749999909 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1090 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1091 3.38717868509 0 99.54649955 0.5435004500000048 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1092 6.14799414721 0 99.4960001 0.5939998999999944 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1093 6.30106886729 0 99.46200015 0.6279998500000034 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1094 6.30106886729 0 99.263000125 0.6404998124999892 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1095 6.30106886729 0 99.469499875 0.620500125000001 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1096 2.81322619695 0 99.576999575 0.5130004249999957 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1097 3.38717868509 0 99.3319991 0.5370013499999899 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1098 3.86059861244 0 99.6120008 0.47799919999999363 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1099 5.02870270579 0 99.467000225 0.622999774999991 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1100 4.93072604433 0 99.51999945 0.5700005499999975 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1101 3.86059861244 0 99.5900002 0.499999799999992 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1102 2.50228643329 0 99.0934995 0.8947507500000071 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1103 6.14799414721 0 99.41949945 0.6705005499999942 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1104 2.00016617632 0 99.639999 0.45000099999999466 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1105 3.7862916372 0 99.331499775 0.5377503375000003 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1106 1.99590274244 0 99.459999 0.6300010000000015 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1107 5.92620561097 0 99.541999675 0.5480003249999982 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1108 2.77405457184 0 99.5785001 0.5114998999999983 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1109 2.50228643329 0 99.39249935 0.6975006500000035 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1110 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1111 5.02870270579 0 99.5159995 0.5740004999999911 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1112 6.14799414721 0 99.465999875 0.6240001250000035 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1113 2.50228643329 0 99.415999925 0.6740000750000036 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1114 3.34244261096 0 99.60100035 0.488999649999991 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1115 1.99590274244 0 99.400002 0.6899979999999971 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1116 3.92040413524 0 99.328499625 0.5422505624999872 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1117 3.85964385182 0 99.52999935 0.5600006500000007 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1118 2.50228643329 0 99.098500425 0.8872493625000004 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1119 6.30106886729 0 99.458000075 0.6319999249999967 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1120 2.00016617632 0 99.519997 0.5700029999999942 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1121 1.97610564729 0 99.080002 0.9149970000000067 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1122 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1123 6.30106886729 0 99.507499575 0.5825004250000007 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1124 3.27579123647 0 99.64050075 0.4494992499999967 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1125 6.14799414721 0 99.49900015 0.5909998499999972 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1126 2.00016617632 0 99.019997 1.0050044999999912 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1127 1.99590274244 0 99.639999 0.45000099999999466 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1128 1.99590274244 0 99.260002 0.6449969999999965 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1129 3.38717868509 0 99.333498075 0.5347528875000052 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1130 3.86059861244 0 99.5950008 0.49499920000000375 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1131 2.78229733114 0 99.580499625 0.5095003749999961 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1132 3.34244261096 0 99.5804993 0.5095006999999981 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1133 2.01610051566 0 99.0 1.0349999999999966 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1134 3.86059861244 0 99.592499975 0.49750002500000223 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1135 3.86059861244 0 99.61300085 0.4769991499999918 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1136 6.30106886729 0 99.429000375 0.6609996249999966 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1137 1.97610564729 0 98.440002 1.8749969999999863 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1138 2.47778695782 0 99.402999625 0.687000374999991 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1139 3.80166404425 0 99.51099975 0.5790002500000014 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1140 2.81322619695 0 99.5854998 0.504500200000004 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1141 2.50228643329 0 99.038999575 0.9765006374999885 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1142 6.30106886729 0 99.4325004 0.6574996000000027 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1143 6.10789096832 0 99.4115002 0.6784997999999917 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1144 6.10789096832 0 99.332999725 0.5355004125000065 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1145 2.01610051566 0 98.459999 1.8450015000000022 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1146 6.14799414721 0 99.428500075 0.6614999249999954 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1147 3.80166404425 0 99.567500275 0.5224997249999973 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1148 2.50228643329 0 98.29800005 2.0879999249999983 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1149 3.86059861244 0 99.4985004 0.5914996000000002 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1150 3.33055390722 0 99.43350015 0.6564998499999973 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1151 6.30106886729 0 99.477000475 0.6129995250000008 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1152 6.14799414721 0 99.495000225 0.5949997749999995 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1153 2.47778695782 0 98.9710005 1.078499249999993 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1154 2.00016617632 0 99.559998 0.5300020000000046 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1155 2.44096937877 0 99.014999325 1.012501012499989 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1156 4.93072604433 0 99.483500225 0.6064997749999975 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1157 2.47778695782 0 99.42549995 0.6645000499999952 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1158 2.78229733114 0 99.5529994 0.5370005999999933 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1159 2.78229733114 0 99.605500375 0.48449962499999233 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1160 3.92040413524 0 99.55899965 0.5310003499999937 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1161 5.02870270579 0 99.4565008 0.6334991999999972 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1162 2.01610051566 0 98.540001 1.724998499999991 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1163 2.47124761202 0 99.3154995 0.5617507499999945 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1164 6.10789096832 0 99.579999725 0.5100002750000044 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1165 6.30106886729 0 99.262000125 0.6419998124999964 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1166 3.86059861244 0 99.5529991 0.5370009000000039 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1167 2.01610051566 0 99.32 0.5550000000000068 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1168 5.02870270579 0 99.253000425 0.6554993625000023 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1169 3.92040413524 0 99.581499875 0.508500124999992 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1170 2.50228643329 0 99.467500525 0.6224994749999923 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1171 2.50228643329 0 98.75049975 1.409250374999992 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1172 3.38717868509 0 99.5780001 0.5119999000000007 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1173 6.30106886729 0 99.467501075 0.6224989249999965 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1174 5.02870270579 0 99.473000175 0.6169998250000021 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1175 2.01610051566 0 97.879997 2.715004499999992 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1176 2.50228643329 0 99.3804997 0.46425044999999443 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1177 2.50228643329 0 99.0319994 0.9870008999999911 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1178 3.38717868509 0 99.5639997 0.5260003000000012 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1179 2.50228643329 0 99.185500475 0.7567492874999999 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1180 3.85964385182 0 99.27000015 0.6299997749999946 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1181 6.30106886729 0 99.43400015 0.6559998499999949 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1182 2.01610051566 0 97.82 2.805000000000007 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1183 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1184 2.50228643329 0 99.3659993 0.4860010499999987 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1185 2.01610051566 0 98.440002 1.8749969999999863 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1186 2.01610051566 0 97.900002 2.6849969999999956 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1187 3.92040413524 0 99.31799965 0.55800052499999 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1188 3.92040413524 0 99.489999925 0.6000000749999913 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1189 2.50228643329 0 99.394499425 0.6955005749999913 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1190 3.92040413524 0 99.560999775 0.5290002249999987 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1191 6.30106886729 0 99.435999925 0.6540000749999934 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1192 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1193 2.01610051566 0 98.120003 2.354995500000001 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1194 2.01610051566 0 99.540001 0.549998999999994 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1195 6.30106886729 0 99.455999775 0.6340002250000026 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1196 2.01610051566 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1197 3.85964385182 0 99.466000075 0.6239999250000011 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1198 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1199 2.01610051566 0 98.0 2.5349999999999966 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1200 6.30106886729 0 99.50349965 0.5865003500000029 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1201 3.38717868509 0 99.476000475 0.6139995249999913 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1202 2.50228643329 0 98.9125001 1.1662498499999927 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1203 3.92040413524 0 99.55849915 0.5315008499999948 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1204 2.50228643329 0 97.895000075 2.6924998874999986 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1205 2.50228643329 0 99.373498975 0.47475153749999066 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1206 2.01610051566 0 99.32 0.5550000000000068 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1207 5.02870270579 0 99.451001075 0.6389989250000042 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1208 2.50228643329 0 99.015499925 1.0117501124999961 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1209 2.50228643329 0 98.9950004 1.042499400000004 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1210 2.81322619695 0 99.5870001 0.5029999000000004 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1211 2.50228643329 0 99.468000825 0.6219991749999935 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1212 2.50228643329 0 99.44400025 0.6459997499999958 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1213 3.38717868509 0 99.581000225 0.5089997750000009 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1214 5.02870270579 0 99.519500225 0.5704997749999962 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1215 2.01610051566 0 99.099998 0.8850029999999975 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1216 2.81322619695 0 99.5439997 0.5460002999999972 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1217 2.50228643329 0 99.472000875 0.6179991249999915 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1218 2.50228643329 0 99.36899875 0.4815018749999922 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1219 6.30106886729 0 99.263999775 0.6390003374999935 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1220 6.30106886729 0 99.45150035 0.6384996499999943 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1221 3.85964385182 0 99.2720009 0.6269986500000044 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1222 2.00016617632 0 99.559998 0.5300020000000046 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1223 2.00016617632 0 99.660004 0.42999599999999705 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1224 2.47124761202 0 99.3569989 0.49950165000000624 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1225 2.47778695782 0 99.005000125 1.0274998125000039 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1226 3.85964385182 0 99.482000425 0.6079995750000023 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1227 2.50228643329 0 99.193499925 0.7447501125000002 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1228 5.02870270579 0 99.440000575 0.6499994249999986 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1229 3.85964385182 0 99.535499725 0.5545002750000038 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1230 3.7862916372 0 99.426000075 0.6639999249999932 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1231 6.14799414721 0 99.511999775 0.578000224999991 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1232 6.30106886729 0 99.434500575 0.6554994249999965 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1233 3.77195447337 0 99.6400006 0.4499994000000044 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1234 3.80166404425 0 99.526999625 0.5630003749999958 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1235 1.99590274244 0 99.580002 0.5099980000000045 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1236 2.47778695782 0 99.029999325 0.9900010124999881 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1237 5.02870270579 0 99.482000275 0.6079997249999934 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1238 2.47778695782 0 99.403499575 0.6865004249999999 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1239 6.10789096832 0 99.323999075 0.5490013874999917 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1240 2.01610051566 0 99.0 1.0349999999999966 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1241 2.77405457184 0 99.42650065 0.6634993500000036 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1242 2.50228643329 0 99.438000575 0.651999424999994 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1243 6.30106886729 0 99.243999875 0.6690001874999965 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1244 6.14799414721 0 99.4244999 0.665500099999997 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1245 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1246 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1247 2.50228643329 0 99.02999865 0.9900020250000026 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1248 3.34244261096 0 99.508000025 0.5819999749999966 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1249 2.47778695782 0 99.48050135 0.6094986500000005 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1250 2.44096937877 0 99.0244998 0.9982502999999951 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1251 2.01610051566 0 99.68 0.4099999999999909 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1252 2.47124761202 0 99.38599915 0.4560012749999913 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1253 6.30106886729 0 99.432500225 0.6574997749999995 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1254 3.33055390722 0 99.574000275 0.5159997249999947 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1255 5.02870270579 0 99.268000775 0.63299883749999 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1256 3.92040413524 0 99.543499525 0.5465004749999963 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1257 1.99590274244 0 99.440002 0.6499979999999909 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1258 6.14799414721 0 99.498500225 0.591499774999997 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1259 3.85964385182 0 99.48400015 0.6059998499999978 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1260 2.50228643329 0 99.2885006 0.6022490999999874 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1261 2.00016617632 0 99.019997 1.0050044999999912 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1262 5.92620561097 0 99.547499375 0.5425006249999967 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1263 4.93072604433 0 99.42350015 0.6664998500000024 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1264 6.30106886729 0 99.5030001 0.5869999000000036 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1265 1.99590274244 0 99.400002 0.6899979999999971 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1266 2.00016617632 0 99.379997 0.46500449999999205 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1267 1.99590274244 0 99.099998 0.8850029999999975 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1268 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1269 6.30106886729 0 99.46300005 0.6269999499999926 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1270 2.50228643329 0 99.42400025 0.6659997499999918 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1271 2.00016617632 0 99.68 0.4099999999999909 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1272 3.38717868509 0 99.5705005 0.5194995000000034 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1273 3.86059861244 0 99.622001025 0.46799897499999477 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1274 6.14799414721 0 99.553999775 0.5360002250000037 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1275 6.14799414721 0 99.5039994 0.5860005999999999 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1276 2.81322619695 0 99.59050025 0.4994997499999926 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1277 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1278 6.14799414721 0 99.45850055 0.6314994500000012 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1279 2.01610051566 0 99.459999 0.6300010000000015 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1280 2.50228643329 0 98.999500425 1.035749362500006 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1281 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1282 2.47124761202 0 99.37799915 0.468001275000006 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1283 4.90489779833 0 99.584500425 0.5054995749999961 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1284 3.80166404425 0 99.423000075 0.6669999249999933 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1285 2.00016617632 0 99.080002 0.9149970000000067 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1286 5.92620561097 0 99.540499425 0.5495005750000047 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1287 6.30106886729 0 99.47250045 0.6174995499999995 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1288 3.92040413524 0 99.551499425 0.5385005749999948 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1289 2.47124761202 0 98.8219998 1.302000299999996 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1290 4.90489779833 0 99.412499525 0.6775004749999965 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1291 2.78229733114 0 99.517998175 0.5720018249999953 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1292 2.00016617632 0 99.559998 0.5300020000000046 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1293 2.47778695782 0 99.43750055 0.652499450000002 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1294 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1295 3.38717868509 0 99.474000925 0.6159990749999992 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1296 2.01610051566 0 99.400002 0.6899979999999971 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1297 6.10789096832 0 99.406000225 0.6839997749999981 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1298 2.47778695782 0 98.236000025 2.180999962500003 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1299 2.50228643329 0 99.004499675 1.0282504874999887 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1300 2.81322619695 0 99.585500025 0.504499974999996 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1301 2.77405457184 0 99.4325001 0.6574998999999991 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1302 2.50228643329 0 98.298500325 2.0872495124999872 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1303 3.77195447337 0 99.63650085 0.4534991499999933 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1304 1.99590274244 0 99.440002 0.6499979999999909 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1305 2.47778695782 0 98.9780005 1.0679992500000068 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1306 2.47124761202 0 99.11650015 0.8602497750000069 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1307 6.30106886729 0 99.464500275 0.6254997249999917 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1308 3.7862916372 0 99.584499975 0.5055000249999978 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1309 3.80166404425 0 99.5214992 0.5685008000000039 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1310 2.50228643329 0 99.28400045 0.6089993250000063 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1311 6.14799414721 0 99.515500125 0.5744998749999951 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1312 3.85964385182 0 99.2660004 0.6359994000000029 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1313 2.47124761202 0 99.192499825 0.7462502624999985 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1314 2.47124761202 0 99.2194988 0.7057518000000016 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1315 6.14799414721 0 99.411000025 0.6789999749999908 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1316 6.30106886729 0 99.501499425 0.588500574999992 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1317 2.00016617632 0 99.199997 0.7350045000000023 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1318 2.01610051566 0 99.160004 0.7949939999999955 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1319 2.50228643329 0 98.240000175 2.174999737499988 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1320 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1321 2.50228643329 0 99.466000925 0.6239990749999947 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1322 6.14799414721 0 99.503499825 0.586500174999992 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1323 2.47778695782 0 99.322 0.5519999999999925 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1324 2.50228643329 0 99.371498925 0.47775161250000053 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1325 2.47778695782 0 99.42149925 0.6685007500000012 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1326 6.30106886729 0 99.26800005 0.632999925 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1327 6.14799414721 0 99.492500075 0.5974999250000025 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1328 5.02870270579 0 99.4635003 0.6264996999999909 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1329 2.78229733114 0 99.5604995 0.5295004999999918 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1330 1.99590274244 0 98.440002 1.8749969999999863 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1331 2.50228643329 0 99.438000275 0.6519997250000046 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1332 3.92040413524 0 99.329498225 0.5407526625000045 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1333 2.50228643329 0 99.473000725 0.6169992749999921 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1334 1.97610564729 0 98.480003 1.814995500000002 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1335 6.30106886729 0 99.4390005 0.6509994999999918 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1336 6.14799414721 0 99.555500175 0.5344998249999918 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1337 4.78704248134 0 99.554999825 0.5350001750000019 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1338 2.47124761202 0 99.034499725 0.9832504124999915 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1339 2.47124761202 0 99.35549875 0.5017518750000036 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1340 2.50228643329 0 98.7550001 1.402499849999991 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1341 2.50228643329 0 98.317499175 2.0587512375000045 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1342 3.92040413524 0 99.476000275 0.6139997249999937 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1343 2.50228643329 0 99.396999175 0.6930008249999929 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1344 2.50228643329 0 99.280500325 0.6142495124999883 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1345 2.01610051566 0 98.959999 1.0950015000000022 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1346 2.01610051566 0 98.900002 1.1849969999999956 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1347 2.50228643329 0 98.8924994 1.1962508999999883 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1348 2.01610051566 0 98.440002 1.8749969999999863 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1349 2.50228643329 0 99.37049845 0.47925232499999737 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1350 2.50228643329 0 98.7390001 1.426499849999999 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1351 2.01610051566 0 98.18 2.2649999999999864 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1352 6.30106886729 0 99.46000045 0.6299995500000023 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1353 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1354 5.02870270579 0 99.27500045 0.6224993250000068 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1355 3.92040413524 0 99.321999375 0.5520009374999901 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1356 3.85964385182 0 99.478000325 0.6119996750000013 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1357 2.50228643329 0 99.18700025 0.7544996250000011 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1358 3.38717868509 0 99.580999825 0.5090001749999914 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1359 6.30106886729 0 99.258000725 0.6479989124999932 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1360 3.85964385182 0 99.273500675 0.6247489875000056 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1361 3.85964385182 0 99.528999325 0.5610006749999968 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1362 2.01610051566 0 97.900002 2.6849969999999956 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1363 2.50228643329 0 99.386499 0.4552514999999957 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1364 2.81322619695 0 99.58850025 0.5014997500000021 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1365 3.38717868509 0 99.471000175 0.6189998249999974 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1366 6.30106886729 0 99.44 0.65 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1367 3.92040413524 0 99.54600005 0.5439999499999942 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1368 2.01610051566 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1369 2.81322619695 0 99.331999075 0.5370013874999984 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1370 2.01610051566 0 97.699997 2.9850045000000023 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1371 2.01610051566 0 99.080002 0.9149970000000067 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1372 2.01610051566 0 99.0 1.0349999999999966 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1373 2.01610051566 0 97.620003 3.104995500000001 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1374 2.50228643329 0 98.237999725 2.1780004125000048 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1375 2.50228643329 0 99.369999825 0.48000026250000616 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1376 3.38717868509 0 99.5664995 0.5235004999999916 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1377 6.30106886729 0 99.4435005 0.6464994999999988 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1378 2.50228643329 0 98.989500525 1.0507492125000013 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1379 2.01610051566 0 99.040001 0.974998499999991 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1380 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1381 3.92040413524 0 99.566499725 0.5235002749999978 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1382 3.38717868509 0 99.542499525 0.5475004750000011 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1383 2.01610051566 0 99.099998 0.8850029999999975 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1384 3.92040413524 0 99.560999575 0.529000425000001 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1385 6.30106886729 0 99.4365008 0.6534991999999932 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1386 2.01610051566 0 99.32 0.5550000000000068 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1387 2.01610051566 0 99.540001 0.549998999999994 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1388 6.30106886729 0 99.50550005 0.5844999500000029 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1389 2.00016617632 0 99.080002 0.9149970000000067 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1390 2.00016617632 0 99.0 1.0349999999999966 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1391 2.50228643329 0 99.185499625 0.7567505624999882 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1392 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1393 6.30106886729 0 99.257000175 0.649499737499994 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1394 5.02870270579 0 99.2665001 0.6352498499999939 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1395 3.84474688915 0 99.428999625 0.6610003749999948 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1396 2.47124761202 0 99.218499125 0.7072513125000057 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1397 6.10789096832 0 99.3400001 0.5249998500000004 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1398 2.81322619695 0 99.581499675 0.5085003249999943 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1399 6.14799414721 0 99.5084996 0.5815004000000045 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1400 1.99590274244 0 99.459999 0.6300010000000015 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1401 2.47778695782 0 99.4290001 0.6609999000000016 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1402 2.50228643329 0 99.4385002 0.6514998000000048 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1403 2.50228643329 0 99.01299965 1.0155005250000002 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1404 1.97610564729 0 99.080002 0.9149970000000067 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1405 3.86059861244 0 99.59099995 0.4990000500000008 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1406 2.00016617632 0 99.639999 0.45000099999999466 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1407 2.47124761202 0 99.321500075 0.5527498874999921 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1408 3.92040413524 0 99.55849965 0.5315003499999961 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1409 2.47778695782 0 99.33599825 0.5310026249999922 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1410 2.44096937877 0 99.022499775 1.0012503374999966 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1411 3.86059861244 0 99.6165006 0.4734994000000029 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1412 2.47778695782 0 99.3005006 0.5842490999999868 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1413 2.81322619695 0 99.5374991 0.5525008999999926 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1414 2.01610051566 0 99.040001 0.974998499999991 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1415 1.99590274244 0 99.580002 0.5099980000000045 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1416 5.02870270579 0 99.469500425 0.620499574999991 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1417 6.14799414721 0 99.461499925 0.6285000749999995 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1418 2.01610051566 0 99.0 1.0349999999999966 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1419 2.50228643329 0 99.413499975 0.6765000250000043 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1420 3.92040413524 0 99.541999425 0.5480005749999975 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1421 2.47778695782 0 99.026499675 0.9952504875000017 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1422 6.30106886729 0 99.4355002 0.6544997999999908 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1423 2.81322619695 0 99.46250045 0.6274995500000046 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1424 2.44096937877 0 99.44600045 0.6439995499999981 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1425 2.00016617632 0 99.339996 0.5250059999999976 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1426 2.00016617632 0 99.379997 0.46500449999999205 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1427 6.14799414721 0 99.55749925 0.5325007499999913 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1428 2.47124761202 0 99.362998825 0.49050176249998856 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1429 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1430 6.14799414721 0 99.418499925 0.6715000749999916 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1431 3.34244261096 0 99.6115011 0.4784988999999996 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1432 2.47778695782 0 99.425000225 0.6649997749999926 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1433 6.30106886729 0 99.464500175 0.625499825 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1434 6.14799414721 0 99.49900035 0.5909996499999949 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1435 2.50228643329 0 99.09000035 0.8999994750000013 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1436 2.47778695782 0 98.9905002 1.0492496999999972 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1437 2.00016617632 0 99.360001 0.49499850000000123 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1438 6.30106886729 0 99.4660008 0.6239991999999944 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1439 2.01610051566 0 99.68 0.4099999999999909 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1440 6.10789096832 0 99.4094999 0.6805000999999976 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1441 3.92040413524 0 99.3284995 0.5422507499999867 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1442 6.30106886729 0 99.515500125 0.5744998749999951 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1443 2.78229733114 0 99.6075006 0.48249940000000324 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1444 2.47124761202 0 99.386499275 0.4552510874999882 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1445 4.93072604433 0 99.42349955 0.6665004499999952 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1446 3.38717868509 0 99.3259999 0.5460001499999976 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1447 3.38717868509 0 99.570499925 0.5195000750000048 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1448 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1449 6.10789096832 0 99.58400005 0.5059999499999975 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1450 3.80166404425 0 99.525999425 0.5640005750000029 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1451 3.7862916372 0 99.4165003 0.6734997000000021 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1452 1.99590274244 0 99.459999 0.6300010000000015 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1453 6.30106886729 0 99.502499825 0.5875001749999967 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1454 2.47778695782 0 99.323499175 0.5497512375000042 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1455 2.47778695782 0 99.047999525 0.9630007125000049 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1456 6.30106886729 0 99.425499675 0.6645003250000002 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1457 2.50228643329 0 98.892999075 1.1955013874999878 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1458 2.44096937877 0 99.398499325 0.6915006749999947 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1459 2.81322619695 0 99.590000925 0.4999990749999995 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1460 3.33055390722 0 99.73649975 0.35350025000000473 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1461 2.47778695782 0 99.26500055 0.6374991750000021 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1462 6.14799414721 0 99.4905 0.5995000000000005 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1463 3.92040413524 0 99.54349955 0.5465004499999907 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1464 2.00016617632 0 99.019997 1.0050044999999912 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1465 2.50228643329 0 99.377499075 0.46875138749999223 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1466 2.00016617632 0 99.0 1.0349999999999966 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1467 2.00016617632 0 98.580002 1.6649970000000067 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1468 2.47124761202 0 99.267000075 0.6344998874999987 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1469 2.50228643329 0 99.283000625 0.6104990624999971 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1470 2.47778695782 0 99.420999375 0.6690006250000039 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1471 2.50228643329 0 99.3724994 0.4762509000000037 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1472 6.14799414721 0 99.413499925 0.6765000750000013 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1473 1.99590274244 0 98.139999 2.325001499999992 -1 gpu conv samp 35 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 35 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1474 2.00016617632 0 99.360001 0.49499850000000123 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1475 6.30106886729 0 99.445500575 0.6444994250000008 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1476 6.10789096832 0 99.399999175 0.6900008249999928 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1477 2.50228643329 0 98.23200065 2.1869990249999915 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1478 3.34244261096 0 99.574999975 0.5150000250000005 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1479 2.47124761202 0 99.022999675 1.0005004875000054 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1480 2.78229733114 0 99.5164984 0.5735015999999945 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1481 2.77405457184 0 99.577998975 0.5120010249999979 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1482 5.02870270579 0 99.273000325 0.6254995124999994 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1483 6.30106886729 0 99.44949995 0.6405000499999943 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1484 2.50228643329 0 98.908000625 1.172999062499997 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1485 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1486 2.81322619695 0 99.587999675 0.5020003249999917 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1487 2.00016617632 0 99.660004 0.42999599999999705 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1488 3.85964385182 0 99.2625002 0.6412496999999888 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1489 1.99590274244 0 99.639999 0.45000099999999466 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1490 2.81322619695 0 99.331499075 0.537751387500002 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1491 3.86059861244 0 99.5910002 0.49899980000000144 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1492 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1493 2.47778695782 0 98.999500075 1.0357498874999962 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1494 3.34244261096 0 99.616000475 0.47399952499999076 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1495 3.85964385182 0 99.484999975 0.6050000250000039 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1496 3.92040413524 0 99.5539999 0.536000100000004 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1497 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1498 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1499 2.47778695782 0 99.0249994 0.9975008999999986 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1500 4.90489779833 0 99.41399985 0.6760001500000016 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1501 2.78229733114 0 99.554999075 0.535000925 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1502 5.92620561097 0 99.5504994 0.539500599999991 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1503 1.99590274244 0 99.260002 0.6449969999999965 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1504 2.47124761202 0 99.310500825 0.5692487624999885 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1505 2.81322619695 0 99.538999575 0.5510004249999924 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1506 2.00016617632 0 99.339996 0.5250059999999976 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1507 6.30106886729 0 99.467000075 0.6229999249999963 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1508 2.47778695782 0 99.39350025 0.6964997499999953 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1509 6.10789096832 0 99.579499625 0.5105003750000009 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1510 4.93072604433 0 99.5169999 0.5730000999999959 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1511 2.50228643329 0 99.3804993 0.46425105000000144 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1512 5.02870270579 0 99.475000625 0.6149993749999908 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1513 2.47778695782 0 98.93350015 1.134749774999996 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1514 6.14799414721 0 99.503000075 0.586999924999995 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1515 5.02870270579 0 99.458999975 0.6310000250000002 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1516 2.50228643329 0 99.0304991 0.9892513499999964 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1517 2.01610051566 0 98.959999 1.0950015000000022 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1518 3.92040413524 0 99.554498975 0.5355010249999964 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1519 3.92040413524 0 99.566999875 0.5230001250000044 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1520 6.30106886729 0 99.474499975 0.6155000249999972 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1521 3.92040413524 0 99.3329993 0.5355010500000006 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1522 2.01610051566 0 99.099998 0.8850029999999975 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1523 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1524 6.30106886729 0 99.502499275 0.5875007249999925 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1525 6.30106886729 0 99.4525002 0.6374997999999948 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1526 2.50228643329 0 98.3025 2.0812500000000043 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1527 6.30106886729 0 99.253999525 0.6540007125000002 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1528 2.01610051566 0 97.900002 2.6849969999999956 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1529 2.50228643329 0 99.3794995 0.4657507500000051 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1530 2.81322619695 0 99.3324994 0.5362508999999918 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1531 6.30106886729 0 99.4319996 0.6580004000000003 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1532 2.01610051566 0 99.220001 0.7049985000000021 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1533 2.50228643329 0 99.4100001 0.6799998999999929 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1534 2.01610051566 0 97.879997 2.715004499999992 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1535 2.01610051566 0 97.699997 2.9850045000000023 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1536 5.02870270579 0 99.476500375 0.6134996249999972 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1537 2.01610051566 0 98.18 2.2649999999999864 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1538 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1539 2.01610051566 0 98.900002 1.1849969999999956 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1540 2.81322619695 0 99.5905005 0.4994994999999932 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1541 2.50228643329 0 99.394999125 0.6950008749999995 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1542 2.50228643329 0 97.880498675 2.714251987499999 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1543 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1544 2.50228643329 0 99.3764997 0.47025045000000176 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1545 2.50228643329 0 99.371499825 0.4777502624999954 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1546 2.01610051566 0 99.400002 0.6899979999999971 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1547 2.01610051566 0 99.32 0.5550000000000068 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1548 3.85964385182 0 99.26600075 0.6359988749999914 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1549 2.50228643329 0 97.659501225 3.045748162499997 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1550 2.01610051566 0 98.0 2.5349999999999966 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1551 2.50228643329 0 98.23250025 2.186249624999995 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1552 2.50228643329 0 99.00199985 1.0320002249999902 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1553 2.50228643329 0 99.37399865 0.4740020249999901 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1554 2.01610051566 0 99.0 1.0349999999999966 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1555 2.50228643329 0 99.102000425 0.8819993624999967 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1556 3.85964385182 0 99.4645003 0.6254997000000003 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1557 2.50228643329 0 99.281000925 0.6134986124999955 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1558 6.30106886729 0 99.4530003 0.6369996999999984 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1559 2.50228643329 0 99.187000725 0.7544989124999901 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1560 2.50228643329 0 98.898499075 1.187251387499991 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1561 2.50228643329 0 98.736500275 1.4302495875000005 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1562 3.38717868509 0 99.573999575 0.5160004249999958 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1563 5.02870270579 0 99.513499725 0.5765002749999951 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1564 2.47778695782 0 98.9680005 1.0829992499999932 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1565 3.33055390722 0 99.737999875 0.3520001249999979 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1566 2.47778695782 0 99.034999825 0.9825002624999968 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1567 2.50228643329 0 99.42649985 0.6635001499999987 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1568 2.78229733114 0 99.512998925 0.5770010749999926 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1569 2.78229733114 0 99.57799995 0.5120000499999918 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1570 4.78704248134 0 99.5660001 0.5239999000000012 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1571 3.85964385182 0 99.475500875 0.6144991250000033 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1572 2.50228643329 0 99.3974997 0.6925003000000004 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1573 6.10789096832 0 99.57150015 0.5184998499999921 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1574 2.81322619695 0 99.542499525 0.5475004750000011 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1575 2.50228643329 0 99.44250095 0.6474990500000019 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1576 6.14799414721 0 99.460000325 0.629999675000002 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1577 2.50228643329 0 99.00200005 1.0319999249999867 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1578 2.47778695782 0 99.050499675 0.9592504875000003 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1579 3.92040413524 0 99.55249935 0.5375006499999927 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1580 6.30106886729 0 99.454000825 0.6359991750000035 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1581 3.92040413524 0 99.5760004 0.5139995999999997 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1582 2.00016617632 0 99.199997 0.7350045000000023 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1583 6.14799414721 0 99.54849945 0.5415005500000035 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1584 2.47124761202 0 99.2154998 0.7117502999999914 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1585 2.81322619695 0 99.57649945 0.5135005499999977 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1586 2.73595882486 0 99.639001225 0.4509987749999965 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1587 2.47778695782 0 99.4405004 0.649499599999993 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1588 2.47124761202 0 99.503500175 0.5864998249999985 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1589 3.80166404425 0 99.47900055 0.6109994500000028 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1590 2.50228643329 0 99.404499675 0.685500325000001 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1591 2.47124761202 0 99.3180001 0.5579998499999874 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1592 3.71567552873 0 99.5499996 0.540000399999991 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1593 2.47124761202 0 99.273500675 0.6247489875000056 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1594 2.50228643329 0 99.026499175 0.9952512374999998 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1595 3.85964385182 0 99.467500875 0.6224991249999988 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1596 3.38717868509 0 99.567999575 0.522000424999996 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1597 3.34244261096 0 99.6130008 0.47699920000000307 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1598 3.80166404425 0 99.52149975 0.5685002499999939 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1599 6.10789096832 0 99.331499675 0.5377504874999914 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1600 2.01610051566 0 99.080002 0.9149970000000067 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1601 5.02870270579 0 99.5239992 0.566000799999992 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1602 3.77195447337 0 99.628501325 0.46149867499999575 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1603 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1604 3.80166404425 0 99.5234991 0.5665009000000026 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1605 6.14799414721 0 99.500499575 0.5895004249999914 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1606 2.01610051566 0 99.68 0.4099999999999909 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1607 2.47124761202 0 99.386499275 0.4552510874999882 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1608 2.01610051566 0 99.160004 0.7949939999999955 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1609 1.99590274244 0 99.599998 0.4900019999999984 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1610 2.50228643329 0 99.00950015 1.0207497750000059 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1611 2.47778695782 0 99.40549985 0.6845001499999995 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1612 3.85964385182 0 99.473500825 0.6164991749999956 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1613 2.01610051566 0 98.959999 1.0950015000000022 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1614 3.86059861244 0 99.506499875 0.5835001249999948 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1615 2.47778695782 0 99.397499725 0.6925002749999948 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1616 5.92620561097 0 99.544999775 0.545000225000004 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1617 6.30106886729 0 99.473999525 0.6160004750000013 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1618 3.86059861244 0 99.5965 0.4934999999999917 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1619 6.14799414721 0 99.5010002 0.5889998000000048 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1620 5.02870270579 0 99.4765004 0.6134995999999916 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1621 3.38717868509 0 99.541999425 0.5480005749999975 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1622 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1623 2.78229733114 0 99.615500325 0.47449967499999846 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1624 6.30106886729 0 99.502500325 0.587499674999998 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1625 6.30106886729 0 99.43000005 0.6599999499999939 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1626 2.00016617632 0 99.019997 1.0050044999999912 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1627 6.30106886729 0 99.26200065 0.6419990249999898 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1628 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1629 4.90489779833 0 99.586500025 0.5034999749999912 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1630 4.93072604433 0 99.510999575 0.5790004249999982 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1631 2.78229733114 0 99.614000375 0.4759996249999944 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1632 3.84474688915 0 99.43100005 0.6589999500000033 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1633 2.47778695782 0 99.2955008 0.5917487999999977 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1634 2.50228643329 0 99.184499825 0.7582502624999918 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1635 2.47778695782 0 99.329499525 0.5407507124999924 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1636 4.93072604433 0 99.52249995 0.567500050000001 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1637 3.33055390722 0 99.576000025 0.5139999749999987 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1638 3.38717868509 0 99.583000075 0.5069999249999967 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1639 3.34244261096 0 99.50749975 0.5825002500000039 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1640 3.92040413524 0 99.558499675 0.5315003250000047 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1641 3.86059861244 0 99.595000075 0.49499992499999623 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1642 2.47778695782 0 99.437500275 0.6524997249999928 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1643 2.50228643329 0 99.373499575 0.47475063750000146 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1644 1.99590274244 0 98.599998 1.6350029999999975 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1645 6.14799414721 0 99.50299975 0.587000249999997 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1646 2.47124761202 0 99.1220011 0.8519983499999881 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1647 2.81322619695 0 99.470500925 0.6194990750000017 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1648 2.47124761202 0 98.3239998 2.049000300000003 -1 gpu conv samp 35 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 35 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1649 2.00016617632 0 98.980003 1.064995500000002 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1650 2.47778695782 0 99.41799995 0.6720000500000026 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1651 3.71567552873 0 99.553999325 0.5360006749999912 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1652 2.01610051566 0 98.459999 1.8450015000000022 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1653 2.77405457184 0 99.738499275 0.3515007250000025 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1654 5.02870270579 0 99.255999975 0.6510000375000047 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1655 2.50228643329 0 98.902999675 1.180500487499991 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1656 6.30106886729 0 99.258499925 0.6472501125000036 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1657 3.27579123647 0 99.64500205 0.44499794999999553 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1658 2.47778695782 0 98.93350115 1.1347482749999998 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1659 2.81322619695 0 99.333498825 0.5347517624999867 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1660 6.30106886729 0 99.460500425 0.6294995749999913 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1661 2.50228643329 0 99.365498975 0.4867515375000053 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1662 5.02870270579 0 99.467001175 0.6229988250000048 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1663 3.85964385182 0 99.47449985 0.6155001499999969 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1664 3.92040413524 0 99.479499575 0.6105004249999922 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1665 4.78704248134 0 99.5624994 0.5275006000000048 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1666 3.85964385182 0 99.468500375 0.6214996249999928 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1667 6.30106886729 0 99.4335002 0.6564998000000003 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1668 3.80166404425 0 99.41949975 0.6705002499999978 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1669 2.81322619695 0 99.573000475 0.5169995249999971 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1670 2.47778695782 0 99.38749925 0.4537511249999895 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1671 2.01610051566 0 98.120003 2.354995500000001 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1672 3.85964385182 0 99.27349995 0.6247500749999944 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1673 2.50228643329 0 99.28100055 0.6134991749999941 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1674 6.14799414721 0 99.507500275 0.5824997249999996 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1675 6.10789096832 0 99.420499775 0.6695002250000016 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1676 4.90489779833 0 99.34499945 0.517500824999992 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1677 6.10789096832 0 99.3319992 0.5370011999999988 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1678 3.38717868509 0 99.46999995 0.6200000499999959 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1679 2.47124761202 0 98.53199905 1.7370014250000025 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1680 2.00016617632 0 99.239998 0.6750029999999967 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1681 2.01610051566 0 99.099998 0.8850029999999975 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1682 2.78229733114 0 99.615001075 0.47499892500000274 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1683 2.47778695782 0 99.47850095 0.6114990500000005 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1684 6.30106886729 0 99.50049995 0.5895000499999924 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1685 2.50228643329 0 99.469001675 0.6209983249999965 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1686 2.00016617632 0 99.620003 0.4699970000000008 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1687 1.99590274244 0 98.440002 1.8749969999999863 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1688 6.14799414721 0 99.46750035 0.6224996500000032 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1689 2.01610051566 0 99.040001 0.974998499999991 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1690 4.93072604433 0 99.4275003 0.6624996999999923 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1691 6.14799414721 0 99.422500325 0.6674996749999963 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1692 3.80166404425 0 99.48150015 0.6084998499999955 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1693 6.30106886729 0 99.465000225 0.6249997750000006 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1694 2.78229733114 0 99.554499175 0.5355008249999941 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1695 2.81322619695 0 99.594500125 0.4954998750000016 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1696 2.50228643329 0 99.28250105 0.6112484250000065 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1697 1.99590274244 0 99.459999 0.6300010000000015 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1698 3.38717868509 0 99.541999225 0.5480007749999999 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1699 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1700 6.30106886729 0 99.451500025 0.6384999749999963 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1701 3.7862916372 0 99.32999935 0.5400009750000052 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1702 2.01610051566 0 98.839996 1.2750059999999976 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1703 2.47124761202 0 99.1944995 0.7432507499999872 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1704 2.00016617632 0 98.580002 1.6649970000000067 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1705 2.47124761202 0 98.273000575 2.1254991375000003 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1706 3.80166404425 0 99.573999925 0.5160000750000023 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1707 2.47124761202 0 98.8214997 1.3027504499999907 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1708 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1709 3.38717868509 0 99.33449965 0.5332505249999997 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1710 2.01610051566 0 98.440002 1.8749969999999863 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1711 2.01610051566 0 99.459999 0.6300010000000015 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1712 2.50228643329 0 98.914500175 1.1632497374999957 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1713 2.01610051566 0 97.82 2.805000000000007 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1714 6.30106886729 0 99.4310002 0.658999799999998 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1715 2.01610051566 0 98.459999 1.8450015000000022 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1716 2.50228643329 0 99.474000575 0.6159994249999926 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1717 3.92040413524 0 99.478000175 0.6119998249999924 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1718 2.01610051566 0 97.699997 2.9850045000000023 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1719 2.01610051566 0 98.959999 1.0950015000000022 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1720 5.02870270579 0 99.51599985 0.5740001499999977 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1721 6.30106886729 0 99.4455001 0.6444998999999939 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1722 2.50228643329 0 99.367498375 0.4837524375000015 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1723 2.01610051566 0 98.120003 2.354995500000001 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1724 2.01610051566 0 98.480003 1.814995500000002 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1725 2.50228643329 0 99.029999175 0.990001237499996 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1726 2.01610051566 0 98.18 2.2649999999999864 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1727 2.81322619695 0 99.4665007 0.6234993000000003 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1728 5.02870270579 0 99.4305001 0.6594998999999945 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1729 2.50228643329 0 99.09350035 0.8947494749999976 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1730 2.01610051566 0 98.900002 1.1849969999999956 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1731 6.30106886729 0 99.2665001 0.6352498499999939 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1732 2.50228643329 0 98.74949935 1.4107509750000062 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1733 6.30106886729 0 99.467000275 0.622999724999994 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1734 3.38717868509 0 99.47200055 0.6179994499999936 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1735 2.50228643329 0 98.994999875 1.0425001874999893 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1736 2.50228643329 0 98.335499625 2.031750562500001 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1737 2.01610051566 0 98.0 2.5349999999999966 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1738 2.50228643329 0 99.402999975 0.6870000249999976 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1739 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1740 3.85964385182 0 99.43049945 0.6595005499999985 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1741 6.30106886729 0 99.4500003 0.6399996999999985 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1742 2.01610051566 0 99.040001 0.974998499999991 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1743 3.92040413524 0 99.53550005 0.5544999500000017 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1744 6.30106886729 0 99.503999575 0.5860004250000032 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf1745 3.38717868509 0 99.5845001 0.5054998999999981 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet_mnist/data/autotuner_data/tuner_promise_confs_batch220_single.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet_mnist/data/autotuner_data/tuner_promise_confs_batch220_single.txt index 7b364eda8f..822d30a531 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet_mnist/data/autotuner_data/tuner_promise_confs_batch220_single.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet_mnist/data/autotuner_data/tuner_promise_confs_batch220_single.txt @@ -8,5881 +8,5881 @@ conf1 1 0 99.69 0 ----- +++++ conf1 2.47778695782 0 99.4405011 0.6494988999999919 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf2 6.7963162944 0 99.247499625 0.6637505624999918 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf3 3.77195447337 0 99.475500875 0.6144991250000033 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf4 3.71656038268 0 99.55999965 0.5300003500000031 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf5 4.4071692756 0 99.52149975 0.5685002499999939 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf6 6.14799414721 0 99.5005001 0.5894999000000013 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf7 2.57685599488 0 99.380999175 0.4635012374999974 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf8 3.13161472572 0 99.57700015 0.5129998499999943 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf9 2.00016617632 0 99.68 0.4099999999999909 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf10 2.57685599488 0 99.097999925 0.8880001125000021 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf11 3.38717868509 0 99.550999225 0.5390007749999995 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf12 3.84474688915 0 99.72650005 0.36349994999999924 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf13 4.48527898013 0 99.467000825 0.6229991749999982 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf14 3.70186719231 0 99.72250035 0.3674996499999935 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf15 2.47778695782 0 98.99100065 1.048499024999991 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf16 4.62093815126 0 99.3364993 0.5302510499999968 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf17 3.09333654389 0 99.6080005 0.4819994999999949 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf18 2.55088214386 0 99.00499975 1.0275003750000025 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf19 3.97649090032 0 99.429000025 0.6609999750000043 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf20 5.92620561097 0 99.556000075 0.5339999249999977 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf21 2.01610051566 0 99.0 1.0349999999999966 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf22 5.22888975029 0 99.508999675 0.5810003249999994 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf23 3.13161472572 0 99.4715006 0.6184993999999989 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf24 5.98028404553 0 99.533499775 0.5565002250000021 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf25 1.99590274244 0 99.400002 0.6899979999999971 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf26 3.84474688915 0 99.566499875 0.5235001249999925 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf27 6.7963162944 0 99.44900035 0.640999649999992 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf28 3.09333654389 0 99.5589995 0.531000499999999 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf29 3.77195447337 0 99.5525002 0.5374998000000005 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf30 5.33920664205 0 99.434000525 0.6559994749999959 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf31 6.61857279171 0 99.493500625 0.5964993750000019 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf32 2.55088214386 0 99.48100075 0.6089992499999909 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf33 6.14799414721 0 99.46299985 0.627000149999995 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf34 2.47778695782 0 99.255999775 0.6510003374999869 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf35 2.55088214386 0 99.405999975 0.6840000249999975 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf36 3.38717868509 0 99.47850045 0.6114995499999992 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf37 4.93072604433 0 99.52249925 0.5675007500000021 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf38 4.48527898013 0 99.4800001 0.6099998999999997 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf39 2.57685599488 0 99.172500625 0.7762490624999998 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf40 4.73066277039 0 99.453000075 0.6369999249999921 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf41 2.55088214386 0 99.25799985 0.6480002249999899 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf42 6.10789096832 0 99.576499825 0.5135001749999987 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf43 3.85964385182 0 99.4670006 0.622999399999992 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf44 2.44096937877 0 99.3809992 0.46350119999998896 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf45 2.57685599488 0 99.419500025 0.6704999749999928 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf46 1.99590274244 0 99.459999 0.6300010000000015 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf47 2.47778695782 0 99.38649995 0.45525007499999504 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf48 2.5439518228 0 99.1235002 0.8497497000000038 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf49 3.71656038268 0 99.572999425 0.5170005749999916 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf50 2.47778695782 0 99.4265002 0.6634997999999911 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf51 4.38652335485 0 99.335499625 0.531750562500001 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf52 2.47778695782 0 99.419999975 0.6700000250000017 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf53 2.50228643329 0 99.034499525 0.983250712499995 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf54 3.97649090032 0 99.570999775 0.5190002249999935 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf55 5.33920664205 0 99.457999875 0.632000124999999 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf56 3.95967525105 0 99.33649965 0.5302505250000067 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf57 2.01610051566 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf58 2.47778695782 0 99.3020012 0.5819981999999868 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf59 5.92620561097 0 99.4655009 0.6244991000000027 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf60 4.64385542353 0 99.468000075 0.6219999249999916 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf61 2.57685599488 0 99.355498525 0.5017522124999942 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf62 4.73066277039 0 99.4225003 0.6674997000000019 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf63 1.99590274244 0 99.639999 0.45000099999999466 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf64 2.00016617632 0 99.019997 1.0050044999999912 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf65 3.38717868509 0 99.333499175 0.5347512374999965 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf66 5.92620561097 0 99.476000125 0.613999874999999 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf67 2.81322619695 0 99.580499825 0.5095001749999938 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf68 2.78229733114 0 99.52199845 0.5680015499999996 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf69 3.34244261096 0 99.6135014 0.47649859999999367 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf70 3.97649090032 0 99.51250005 0.5774999499999979 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf71 6.61857279171 0 99.5040001 0.5859998999999988 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf72 6.61857279171 0 99.420500525 0.6694994750000035 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf73 6.61857279171 0 99.504999975 0.5850000249999937 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf74 3.63433700317 0 99.652501475 0.43749852500000375 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf75 6.10789096832 0 99.3409999 0.5235001499999967 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf76 4.4071692756 0 99.478000575 0.6119994250000019 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf77 2.50228643329 0 99.40099985 0.6890001499999926 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf78 3.77195447337 0 99.3255004 0.546749400000003 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf79 3.80166404425 0 99.565500025 0.524499974999992 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf80 6.57211871555 0 99.333000125 0.5354998124999995 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf81 2.50228643329 0 99.47350135 0.6164986499999913 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf82 5.79060658268 0 99.519000125 0.5709998749999926 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf83 2.50228643329 0 99.381999275 0.46200108749999913 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf84 2.47778695782 0 99.1835005 0.7597492500000058 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf85 2.00016617632 0 99.639999 0.45000099999999466 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf86 2.55088214386 0 99.39199985 0.6980001499999929 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf87 3.85964385182 0 99.4795001 0.6104999000000021 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf88 4.38652335485 0 99.41799955 0.6720004499999931 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf89 4.93072604433 0 99.51499965 0.5750003500000048 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf90 3.13161472572 0 99.574500425 0.5154995750000012 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf91 3.09333654389 0 99.6145003 0.47549969999999464 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf92 3.92040413524 0 99.578000175 0.5119998249999981 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf93 5.33920664205 0 99.44499995 0.6450000500000016 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf94 2.47778695782 0 99.45300095 0.6369990499999943 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf95 2.50228643329 0 99.36299905 0.49050142499999794 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf96 5.02870270579 0 99.458500175 0.6314998250000002 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf97 5.75501684906 0 99.346999025 0.5145014624999931 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf98 2.57685599488 0 99.436500375 0.6534996250000035 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf99 3.33055390722 0 99.4209998 0.6690001999999936 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf100 5.59344058403 0 99.55649965 0.5335003499999914 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf101 5.79060658268 0 99.556999575 0.5330004249999917 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf102 3.7862916372 0 99.3334994 0.5347509000000059 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf103 6.30106886729 0 99.5175 0.5724999999999995 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf104 5.06758777035 0 99.552499325 0.5375006749999983 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf105 6.14799414721 0 99.499999525 0.5900004749999909 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf106 2.51187737029 0 99.4014999 0.6885000999999932 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf107 2.55088214386 0 99.39649945 0.6935005500000045 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf108 2.47124761202 0 99.1929994 0.745500899999989 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf109 4.03997047176 0 99.52149955 0.5685004499999963 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf110 3.70186719231 0 99.42099935 0.6690006499999953 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf111 2.47778695782 0 99.404999575 0.6850004249999927 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf112 3.97649090032 0 99.4600001 0.6299998999999957 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf113 2.00016617632 0 99.559998 0.5300020000000046 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf114 3.86059861244 0 99.50650005 0.5834999499999981 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf115 2.57685599488 0 99.460000575 0.6299994250000026 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf116 5.33920664205 0 99.2685007 0.6322489499999904 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf117 5.02870270579 0 99.468000425 0.6219995749999981 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf118 4.4071692756 0 99.56599955 0.524000449999997 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf119 5.02870270579 0 99.4325 0.6574999999999932 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf120 6.61857279171 0 99.457500525 0.6324994749999974 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf121 2.00016617632 0 99.080002 0.9149970000000067 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf122 2.50228643329 0 99.4199998 0.6700001999999984 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf123 2.01610051566 0 99.68 0.4099999999999909 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf124 6.16535217595 0 99.58050025 0.5094997499999977 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf125 6.30106886729 0 99.45700045 0.6329995500000024 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf126 2.57685599488 0 99.407999525 0.6820004750000038 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf127 3.77195447337 0 99.6320011 0.4579989000000012 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf128 6.57211871555 0 99.3999996 0.6900003999999967 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf129 6.36224047437 0 99.264500075 0.6382498874999953 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf130 4.48527898013 0 99.52299915 0.5670008499999938 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf131 5.79060658268 0 99.416500475 0.6734995249999912 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf132 2.5439518228 0 99.32149995 0.5527500749999916 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf133 2.55088214386 0 99.4905002 0.5994997999999981 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf134 3.95967525105 0 99.4149999 0.6750000999999998 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf135 2.01610051566 0 99.540001 0.549998999999994 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf136 3.80166404425 0 99.421499725 0.6685002749999939 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf137 3.7862916372 0 99.419999575 0.6700004249999921 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf138 3.77195447337 0 99.570499675 0.5195003250000042 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf139 3.86059861244 0 99.6145001 0.475499899999997 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf140 2.78229733114 0 99.609500425 0.4804995750000046 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf141 5.02870270579 0 99.446999675 0.643000324999997 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf142 3.77195447337 0 99.540499625 0.5495003750000024 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf143 4.90489779833 0 99.588500425 0.5014995749999912 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf144 5.75501684906 0 99.570999975 0.5190000249999912 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf145 2.5439518228 0 99.37649915 0.4702512749999954 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf146 6.16535217595 0 99.336499425 0.5302508624999973 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf147 5.33920664205 0 99.46600045 0.6239995500000021 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf148 2.47124761202 0 99.125500425 0.8467493624999989 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf149 4.03997047176 0 99.44900065 0.6409993499999956 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf150 4.48527898013 0 99.268500375 0.6322494374999934 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf151 3.08315119118 0 99.7369988 0.3530012000000028 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf152 2.55088214386 0 99.406000225 0.6839997749999981 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf153 6.36224047437 0 99.46550055 0.6244994499999962 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf154 4.73066277039 0 99.46849975 0.6215002499999912 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf155 3.38717868509 0 99.5730001 0.5169998999999962 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf156 6.10789096832 0 99.4119995 0.6780005000000046 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf157 3.08315119118 0 99.426000325 0.6639996749999938 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf158 4.93072604433 0 99.431000325 0.6589996749999983 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf159 5.79060658268 0 99.47600065 0.6139993499999946 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf160 6.36224047437 0 99.54099985 0.549000149999992 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf161 2.50228643329 0 99.3779992 0.4680011999999891 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf162 6.7963162944 0 99.4620004 0.627999600000004 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf163 2.57685599488 0 99.37699935 0.46950097499998833 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf164 6.20621598565 0 99.471000525 0.618999475000004 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf165 6.30106886729 0 99.439 0.6510000000000048 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf166 6.36224047437 0 99.440499925 0.6495000750000003 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf167 4.29202279061 0 99.5565002 0.5334997999999956 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf168 2.57685599488 0 98.9975002 1.0387496999999897 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf169 3.85964385182 0 99.483499875 0.606500124999991 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf170 2.50228643329 0 99.2815005 0.612749249999986 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf171 6.36224047437 0 99.457000375 0.6329996249999909 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf172 6.20621598565 0 99.55249975 0.5375002500000022 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf173 3.33055390722 0 99.583999875 0.5060001249999942 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf174 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf175 5.75501684906 0 99.41749995 0.6725000499999908 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf176 6.20621598565 0 99.503500175 0.5864998249999985 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf177 3.85964385182 0 99.4360005 0.653999499999992 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf178 2.47778695782 0 99.3329996 0.535500600000006 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf179 2.55088214386 0 99.409999625 0.6800003750000002 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf180 2.57685599488 0 99.010999225 1.0185011624999873 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf181 4.64385542353 0 99.567499925 0.5225000749999907 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf182 2.47778695782 0 99.4029989 0.6870010999999977 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf183 3.38717868509 0 99.57399985 0.5160001499999908 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf184 2.77405457184 0 99.4234996 0.6665003999999982 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf185 2.50228643329 0 99.379499125 0.4657513125000037 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf186 6.14799414721 0 99.4990005 0.5909995000000038 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf187 6.7963162944 0 99.43399975 0.6560002499999996 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf188 5.33920664205 0 99.51449955 0.5755004500000013 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf189 6.30106886729 0 99.266500125 0.6352498125000068 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf190 4.93072604433 0 99.56199985 0.5280001499999912 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf191 4.73066277039 0 99.51799975 0.5720002499999964 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf192 2.57685599488 0 99.285000425 0.6074993624999863 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf193 2.5439518228 0 99.1979999 0.7380001499999977 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf194 5.92620561097 0 99.44250075 0.6474992500000042 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf195 2.01610051566 0 99.459999 0.6300010000000015 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf196 2.47778695782 0 99.0264997 0.9952504499999932 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf197 4.48527898013 0 99.449500675 0.6404993250000018 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf198 4.64385542353 0 99.5164996 0.5735003999999947 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf199 5.19985255986 0 99.3364995 0.5302507499999933 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf200 3.34244261096 0 99.608000925 0.48199907499999883 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf201 4.73066277039 0 99.265000525 0.6374992124999892 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf202 3.97649090032 0 99.51849965 0.5715003500000023 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf203 3.97649090032 0 99.52049955 0.569500450000001 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf204 5.22888975029 0 99.508000225 0.5819997749999942 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf205 4.90489779833 0 99.42149985 0.6685001499999942 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf206 2.5439518228 0 99.492999825 0.5970001749999995 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf207 4.03997047176 0 99.46600075 0.6239992499999915 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf208 2.81322619695 0 99.589000375 0.5009996250000001 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf209 2.01610051566 0 99.32 0.5550000000000068 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf210 5.22888975029 0 99.5105 0.5795000000000045 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf211 2.55088214386 0 99.42599955 0.6640004499999975 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf212 2.81322619695 0 99.542999675 0.5470003249999934 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf213 2.47778695782 0 99.05800035 0.947999474999996 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf214 2.55088214386 0 99.39599955 0.6940004499999987 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf215 4.93072604433 0 99.472500675 0.6174993249999915 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf216 5.02870270579 0 99.270500225 0.629249662499987 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf217 3.86059861244 0 99.590000025 0.4999999750000029 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf218 3.88250959671 0 99.55349975 0.5365002499999975 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf219 2.50228643329 0 99.365499425 0.48675086250000277 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf220 2.55088214386 0 98.99799995 1.0380000750000065 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf221 3.80166404425 0 99.53449915 0.5555008499999957 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf222 2.00016617632 0 99.239998 0.6750029999999967 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf223 2.00016617632 0 99.519997 0.5700029999999942 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf224 2.51187737029 0 99.024499825 0.9982502624999867 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf225 3.86059861244 0 99.57049935 0.519500649999992 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf226 2.47778695782 0 99.4985004 0.5914996000000002 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf227 3.71656038268 0 99.508999525 0.5810004750000047 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf228 3.71656038268 0 99.607000725 0.48299927499999173 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf229 2.47778695782 0 99.324499325 0.5482510124999891 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf230 6.14799414721 0 99.5609996 0.5290003999999954 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf231 3.77195447337 0 99.5870001 0.5029999000000004 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf232 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf233 3.92040413524 0 99.545499775 0.5445002250000016 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf234 3.92040413524 0 99.57699935 0.5130006500000036 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf235 2.55088214386 0 98.987999875 1.0530001874999968 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf236 2.57685599488 0 99.363999075 0.48900138750000366 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf237 4.48527898013 0 99.4369996 0.6530004000000048 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf238 5.79060658268 0 99.5149995 0.5750004999999959 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf239 2.81322619695 0 99.463000675 0.6269993249999942 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf240 4.64385542353 0 99.523000025 0.566999974999996 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf241 2.81322619695 0 99.33849865 0.5272520249999886 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf242 3.95967525105 0 99.6014996 0.48850040000000094 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf243 6.36224047437 0 99.4400002 0.6499997999999977 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf244 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf245 2.78229733114 0 99.57849985 0.5115001499999977 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf246 3.92040413524 0 99.554999725 0.5350002749999959 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf247 2.50228643329 0 98.990500375 1.049249437500002 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf248 2.57685599488 0 99.272500225 0.626249662499994 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf249 2.00016617632 0 99.339996 0.5250059999999976 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf250 2.57685599488 0 99.4725001 0.6174998999999929 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf251 2.55088214386 0 99.3259999 0.5460001499999976 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf252 2.73595882486 0 99.64500115 0.44499884999999895 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf253 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf254 6.14799414721 0 99.418500325 0.6714996750000012 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf255 3.92040413524 0 99.3274997 0.5437504499999903 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf256 5.92620561097 0 99.256500075 0.6502498874999887 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf257 2.47778695782 0 99.4835009 0.6064991000000021 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf258 2.57685599488 0 99.441500975 0.648499025000001 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf259 3.86059861244 0 99.615001125 0.4749988749999915 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf260 2.57685599488 0 99.02799945 0.9930008250000029 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf261 3.85964385182 0 99.522499925 0.5675000749999924 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf262 6.61857279171 0 99.545499925 0.5445000749999963 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf263 2.01610051566 0 99.099998 0.8850029999999975 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf264 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf265 4.90489779833 0 99.343498975 0.5197515374999924 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf266 2.55088214386 0 99.449500125 0.6404998749999976 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf267 3.38717868509 0 99.56699955 0.5230004499999922 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf268 6.16535217595 0 99.40699995 0.6830000499999983 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf269 3.13161472572 0 99.577999975 0.5120000250000004 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf270 5.02870270579 0 99.5194997 0.5705003000000005 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf271 2.01610051566 0 99.220001 0.7049985000000021 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf272 4.03997047176 0 99.270000125 0.629999812500003 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf273 2.50228643329 0 99.436000225 0.653999774999997 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf274 6.30106886729 0 99.4565001 0.6334998999999982 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf275 4.64385542353 0 99.520499675 0.5695003250000014 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf276 6.20621598565 0 99.51049995 0.5795000500000015 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf277 5.22888975029 0 99.561499175 0.5285008250000033 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf278 2.50228643329 0 99.100999975 0.8835000375000064 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf279 6.20621598565 0 99.50749955 0.5825004499999921 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf280 2.50228643329 0 99.408499875 0.6815001249999938 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf281 2.44096937877 0 99.442000325 0.6479996750000027 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf282 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf283 4.78704248134 0 99.559999825 0.5300001749999922 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf284 6.7963162944 0 99.50250015 0.5874998499999947 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf285 4.93072604433 0 99.514499925 0.5755000750000022 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf286 2.50228643329 0 99.4350005 0.6549994999999967 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf287 4.51618813067 0 99.55749945 0.5325005500000032 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf288 5.22888975029 0 99.41699995 0.6730000499999932 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf289 4.4071692756 0 99.51849985 0.57150015 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf290 2.5439518228 0 99.19599945 0.7410008249999933 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf291 3.85964385182 0 99.27150005 0.6277499249999963 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf292 3.86059861244 0 99.5890004 0.5009995999999944 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf293 2.57685599488 0 99.377499425 0.4687508625000021 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf294 3.71656038268 0 99.59550035 0.49449965000000307 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf295 2.57685599488 0 99.360999575 0.4935006375000057 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf296 5.19985255986 0 99.40499985 0.6850001500000019 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf297 4.03997047176 0 99.44049995 0.6495000499999947 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf298 2.5439518228 0 99.282001325 0.6119980125000026 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf299 2.01610051566 0 98.959999 1.0950015000000022 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf300 2.47778695782 0 98.968000625 1.0829990624999937 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf301 3.71656038268 0 99.624000875 0.46599912500000473 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf302 3.70186719231 0 99.567000175 0.5229998249999938 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf303 5.79060658268 0 99.507000675 0.5829993249999973 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf304 4.03997047176 0 99.4725003 0.6174997000000048 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf305 6.20621598565 0 99.42449955 0.6655004500000047 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf306 2.55088214386 0 99.171000825 0.778498762500007 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf307 3.71567552873 0 99.566999925 0.5230000749999931 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf308 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf309 2.51187737029 0 99.449501 0.6404989999999998 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf310 2.47778695782 0 99.409999625 0.6800003750000002 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf311 2.00016617632 0 99.559998 0.5300020000000046 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf312 3.92040413524 0 99.4805003 0.609499699999995 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf313 3.7862916372 0 99.593500375 0.4964996249999928 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf314 2.5439518228 0 99.2214994 0.7027508999999981 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf315 2.47778695782 0 98.999999825 1.0350002624999917 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf316 2.00016617632 0 99.360001 0.49499850000000123 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf317 3.13161472572 0 99.53799935 0.5520006499999909 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf318 2.01610051566 0 99.580002 0.5099980000000045 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf319 2.5439518228 0 99.353998375 0.5040024374999916 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf320 2.00016617632 0 99.019997 1.0050044999999912 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf321 5.92620561097 0 99.510499825 0.5795001750000012 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf322 6.7963162944 0 99.430000375 0.6599996249999919 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf323 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf324 2.50228643329 0 99.288500125 0.6022498124999984 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf325 3.13161472572 0 99.335998525 0.531002212500006 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf326 6.36224047437 0 99.513500075 0.5764999250000017 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf327 2.55088214386 0 99.386498775 0.4552518374999863 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf328 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf329 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf330 6.57211871555 0 99.56999995 0.5200000500000016 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf331 5.92620561097 0 99.4315001 0.6584999000000039 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf332 6.30106886729 0 99.432499925 0.6575000749999959 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf333 2.55088214386 0 99.500499575 0.5895004249999914 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf334 6.57211871555 0 99.3380004 0.5279993999999988 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf335 6.30106886729 0 99.4234996 0.6665003999999982 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf336 2.01610051566 0 98.480003 1.814995500000002 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf337 2.47778695782 0 99.0089999 1.0215001499999872 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf338 4.73066277039 0 99.511499475 0.578500525000004 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf339 5.92620561097 0 99.42850015 0.6614998499999928 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf340 6.10789096832 0 99.337999325 0.528001012499999 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf341 2.00016617632 0 99.639999 0.45000099999999466 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf342 3.77195447337 0 99.642001075 0.4479989250000017 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf343 5.98028404553 0 99.551499675 0.5385003249999954 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf344 2.47778695782 0 99.443499975 0.6465000250000031 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf345 4.62093815126 0 99.58099945 0.5090005500000047 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf346 3.80166404425 0 99.565000275 0.524999724999995 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf347 2.50228643329 0 99.4395002 0.6504998000000001 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf348 2.00016617632 0 98.419998 1.9050029999999865 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf349 2.5439518228 0 99.21349975 0.7147503750000013 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf350 6.57211871555 0 99.5695004 0.5204996000000023 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf351 2.47778695782 0 99.02399955 0.9990006749999978 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf352 2.81322619695 0 99.337498225 0.5287526624999899 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf353 4.48527898013 0 99.4734996 0.6165004000000011 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf354 2.78229733114 0 99.551499625 0.5385003749999925 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf355 4.93072604433 0 99.473499625 0.6165003749999954 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf356 3.84474688915 0 99.427999575 0.6620004249999966 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf357 3.95967525105 0 99.333499625 0.534750562499994 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf358 3.80166404425 0 99.4210003 0.6689996999999949 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf359 6.30106886729 0 99.4224998 0.6675002000000007 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf360 3.7862916372 0 99.329999175 0.5400012375000003 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf361 2.81322619695 0 99.46900045 0.620999550000002 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf362 4.38652335485 0 99.335999375 0.5310009374999964 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf363 2.55088214386 0 99.264999825 0.6375002624999908 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf364 6.61857279171 0 99.55200015 0.53799985 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf365 5.79060658268 0 99.471500275 0.6184997250000009 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf366 2.5439518228 0 99.48699985 0.6030001499999941 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf367 3.77195447337 0 99.55600025 0.533999750000001 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf368 6.36224047437 0 99.253000075 0.6554998874999924 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf369 5.02870270579 0 99.4630002 0.6269998000000016 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf370 6.7963162944 0 99.447499725 0.6425002749999976 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf371 6.10789096832 0 99.574500175 0.5154998250000006 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf372 2.50228643329 0 99.382000175 0.461999737499994 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf373 3.34244261096 0 99.613501175 0.4764988250000016 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf374 6.14799414721 0 99.4234998 0.6665001999999959 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf375 2.5439518228 0 98.316499775 2.060250337500001 -1 gpu conv samp 35 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 35 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf376 3.92040413524 0 99.54199925 0.5480007499999943 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf377 5.22888975029 0 99.508000375 0.5819996250000031 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf378 2.01610051566 0 99.540001 0.549998999999994 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf379 4.03997047176 0 99.4290005 0.660999499999997 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf380 3.77195447337 0 99.579999825 0.5100001749999962 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf381 3.85964385182 0 99.46100045 0.6289995499999975 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf382 4.4071692756 0 99.519999425 0.5700005750000031 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf383 3.63433700317 0 99.644001375 0.44599862499999576 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf384 6.61857279171 0 99.49999985 0.590000150000003 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf385 2.5439518228 0 99.35599865 0.5010020249999911 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf386 6.61857279171 0 99.503999925 0.5860000749999955 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf387 6.16535217595 0 99.412499275 0.6775007249999959 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf388 3.97649090032 0 99.555499475 0.5345005249999929 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf389 5.33920664205 0 99.46450045 0.625499549999995 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf390 2.47124761202 0 99.383499 0.45975149999999587 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf391 3.77195447337 0 99.335499475 0.5317507874999876 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf392 6.61857279171 0 99.460500025 0.629499974999996 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf393 6.36224047437 0 99.441499775 0.6485002250000008 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf394 5.59344058403 0 99.55299965 0.5370003499999939 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf395 6.36224047437 0 99.5040001 0.5859998999999988 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf396 3.08315119118 0 99.56949995 0.520500050000004 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf397 4.93072604433 0 99.556999875 0.5330001249999953 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf398 2.50228643329 0 99.284500025 0.6082499624999969 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf399 2.47124761202 0 99.31950005 0.5557499249999935 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf400 6.10789096832 0 99.412999875 0.6770001250000007 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf401 2.5439518228 0 99.284500375 0.6082494375000067 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf402 2.55088214386 0 98.27499985 2.122500224999996 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf403 3.34244261096 0 99.51049965 0.5795003499999979 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf404 5.22888975029 0 99.512500025 0.5774999750000035 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf405 5.92620561097 0 99.272500625 0.626249062499987 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf406 2.55088214386 0 99.416999825 0.6730001749999929 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf407 2.50228643329 0 98.888500375 1.2022494374999866 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf408 3.97649090032 0 99.417999375 0.672000625000004 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf409 3.85964385182 0 99.437500625 0.6524993749999993 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf410 2.50228643329 0 99.46800095 0.6219990499999938 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf411 4.93072604433 0 99.516499975 0.5735000249999956 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf412 4.64385542353 0 99.558999325 0.5310006749999957 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf413 2.50228643329 0 99.366999125 0.4845013124999866 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf414 4.48527898013 0 99.27900015 0.6164997749999941 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf415 5.75501684906 0 99.582499775 0.5075002249999955 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf416 6.14799414721 0 99.503999875 0.5860001249999925 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf417 4.73066277039 0 99.42850035 0.6614996500000047 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf418 5.79060658268 0 99.5514997 0.538500300000004 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf419 2.50228643329 0 98.90149975 1.1827503749999977 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf420 1.99590274244 0 99.440002 0.6499979999999909 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf421 2.5439518228 0 99.380999675 0.4635004874999993 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf422 2.01610051566 0 98.900002 1.1849969999999956 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf423 2.55088214386 0 99.040499575 0.9742506374999991 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf424 2.5439518228 0 99.119001075 0.8564983874999967 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf425 6.30106886729 0 99.4704999 0.6195001000000048 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf426 3.97649090032 0 99.463000875 0.6269991249999919 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf427 5.02870270579 0 99.279499875 0.615750187499998 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf428 3.86059861244 0 99.55900015 0.5309998499999949 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf429 2.55088214386 0 99.40249965 0.687500350000002 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf430 2.47778695782 0 99.033499525 0.9847507125000021 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf431 1.99590274244 0 99.400002 0.6899979999999971 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf432 2.81322619695 0 99.5774999 0.5125000999999912 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf433 2.01610051566 0 99.400002 0.6899979999999971 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf434 3.38717868509 0 99.569500275 0.520499725000002 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf435 2.55088214386 0 98.984499925 1.058250112500005 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf436 6.57211871555 0 99.41049985 0.6795001500000041 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf437 2.57685599488 0 98.3014998 2.0827502999999936 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf438 3.92040413524 0 99.48750085 0.6024991499999942 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf439 4.73066277039 0 99.268000275 0.6329995874999881 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf440 4.90489779833 0 99.42150015 0.6684998499999978 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf441 2.5439518228 0 99.314000025 0.5639999624999987 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf442 2.51187737029 0 99.0054997 1.0267504499999944 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf443 6.14799414721 0 99.55700005 0.5329999499999986 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf444 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf445 2.57685599488 0 99.2825003 0.6112495500000037 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf446 2.47124761202 0 98.8115005 1.3177492500000056 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf447 2.78229733114 0 99.60900025 0.4809997500000037 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf448 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf449 2.57685599488 0 99.0224999 1.001250149999997 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf450 3.97649090032 0 99.51849985 0.57150015 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf451 3.80166404425 0 99.481500675 0.6084993249999912 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf452 2.47778695782 0 99.476501075 0.6134989249999961 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf453 2.57685599488 0 99.40400015 0.6859998499999961 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf454 6.16535217595 0 99.57800005 0.5119999499999978 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf455 6.7963162944 0 99.43149955 0.6585004499999997 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf456 6.16535217595 0 99.3334992 0.5347511999999881 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf457 2.57685599488 0 98.3189993 2.0565010499999943 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf458 2.01610051566 0 99.220001 0.7049985000000021 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf459 2.51187737029 0 99.4385002 0.6514998000000048 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf460 4.48527898013 0 99.527499525 0.5625004750000017 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf461 6.20621598565 0 99.499999475 0.5900005250000021 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf462 2.5439518228 0 98.809000625 1.3214990625000027 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf463 4.90489779833 0 99.3339999 0.5340001500000042 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf464 3.84474688915 0 99.57100005 0.5189999500000028 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf465 2.01610051566 0 99.580002 0.5099980000000045 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf466 6.7963162944 0 99.2584997 0.6472504499999943 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf467 6.61857279171 0 99.43000025 0.6599997499999916 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf468 5.22888975029 0 99.51450005 0.5754999500000025 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf469 2.57685599488 0 99.277500725 0.6187489125000027 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf470 2.55088214386 0 98.95650025 1.1002496249999894 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf471 2.01610051566 0 98.959999 1.0950015000000022 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf472 5.02870270579 0 99.47649985 0.6135001500000016 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf473 6.20621598565 0 99.456000575 0.6339994249999933 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf474 5.79060658268 0 99.4119998 0.678000199999994 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf475 6.20621598565 0 99.4235003 0.6664996999999971 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf476 2.00016617632 0 99.559998 0.5300020000000046 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf477 2.50228643329 0 99.391498725 0.6985012749999925 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf478 2.55088214386 0 99.321500125 0.5527498124999966 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf479 3.92040413524 0 99.574499775 0.515500224999991 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf480 6.7963162944 0 99.50149975 0.5885002500000042 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf481 2.78229733114 0 99.61500085 0.4749991499999965 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf482 4.64385542353 0 99.51899995 0.5710000500000035 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf483 3.86059861244 0 99.604000675 0.4859993250000031 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf484 5.19985255986 0 99.580500375 0.509499624999998 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf485 2.47124761202 0 99.2139993 0.7140010500000002 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf486 2.50228643329 0 98.319499875 2.0557501874999886 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf487 2.57685599488 0 99.026999275 0.9945010875000051 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf488 3.85964385182 0 99.267500325 0.6337495124999961 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf489 1.99590274244 0 98.940002 1.1249969999999863 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf490 2.57685599488 0 99.3614989 0.4927516499999953 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf491 2.47778695782 0 98.26250005 2.141249924999997 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf492 3.95967525105 0 99.5879999 0.502000099999998 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf493 3.92040413524 0 99.561499525 0.5285004749999956 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf494 4.93072604433 0 99.5169992 0.573000799999997 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf495 2.57685599488 0 98.72399965 1.4490005250000024 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf496 4.03997047176 0 99.26750035 0.6337494749999877 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf497 2.00016617632 0 99.379997 0.46500449999999205 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf498 2.5439518228 0 99.20499985 0.7275002250000071 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf499 3.84474688915 0 99.72549965 0.3645003499999945 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf500 4.51618813067 0 99.5604993 0.5295006999999942 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf501 3.95967525105 0 99.411999625 0.6780003749999907 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf502 5.33920664205 0 99.450000925 0.6399990750000001 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf503 3.92040413524 0 99.562499325 0.5275006749999932 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf504 2.55088214386 0 99.474500675 0.6154993249999962 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf505 6.30106886729 0 99.268999325 0.6315010125000029 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf506 3.86059861244 0 99.612500925 0.47749907499999156 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf507 3.86059861244 0 99.50949945 0.5805005499999908 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf508 5.33920664205 0 99.26700055 0.6344991749999878 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf509 2.55088214386 0 98.9395005 1.1257492500000055 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf510 2.57685599488 0 99.3634994 0.4897509000000042 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf511 2.47778695782 0 98.987000525 1.054499212499998 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf512 6.30106886729 0 99.5064999 0.5835001000000034 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf513 4.03997047176 0 99.463000375 0.6269996250000048 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf514 6.14799414721 0 99.493500275 0.5964997249999954 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf515 6.20621598565 0 99.51799935 0.5720006500000011 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf516 2.50228643329 0 98.747499825 1.4137502624999883 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf517 5.06758777035 0 99.539499625 0.5505003749999929 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf518 5.33920664205 0 99.474000775 0.6159992250000045 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf519 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf520 2.47778695782 0 99.42150005 0.6684999499999918 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf521 3.38717868509 0 99.3224999 0.5512501500000013 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf522 4.62093815126 0 99.338999375 0.5265009374999963 -1 gpu conv samp 34 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf523 2.57685599488 0 99.097500525 0.8887492124999952 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf524 2.50228643329 0 99.008999525 1.021500712500007 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf525 2.47778695782 0 99.1829998 0.7605002999999897 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf526 5.92620561097 0 99.543499475 0.5465005249999934 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf527 6.61857279171 0 99.49699995 0.5930000499999949 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf528 3.80166404425 0 99.515499425 0.5745005749999962 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf529 2.47778695782 0 99.042499675 0.9712504874999937 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf530 2.50228643329 0 99.37299885 0.47550172499999377 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf531 4.64385542353 0 99.424999925 0.6650000750000032 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf532 2.50228643329 0 99.372999525 0.4755007125000006 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf533 1.99590274244 0 98.599998 1.6350029999999975 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf534 5.02870270579 0 99.449499375 0.6405006249999957 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf535 3.88250959671 0 99.554000025 0.5359999750000043 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf536 3.08315119118 0 99.72599995 0.3640000499999957 -1 gpu conv samp 33 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf537 4.78704248134 0 99.564499975 0.5255000249999938 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf538 2.50228643329 0 99.092500375 0.8962494374999963 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf539 6.36224047437 0 99.5414994 0.5485005999999913 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf540 2.55088214386 0 99.173000725 0.7754989125000051 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf541 2.55088214386 0 98.207499625 2.223750562500001 -1 gpu conv perf 23 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf542 2.5439518228 0 99.0289995 0.9915007500000002 -1 gpu conv samp 36 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf543 3.85964385182 0 99.462500375 0.627499624999993 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf544 5.79060658268 0 99.508000025 0.5819999749999966 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf545 4.93072604433 0 99.42049995 0.6695000499999907 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf546 4.29202279061 0 99.5639995 0.5260005000000035 -1 gpu conv fp16 1 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf547 2.55088214386 0 99.298499575 0.5872506375000057 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf548 3.86059861244 0 99.570500025 0.5194999749999966 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf549 6.7963162944 0 99.459000275 0.6309997250000038 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf550 6.20621598565 0 99.5074999 0.5825000999999986 -1 gpu conv perf 25 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf551 6.14799414721 0 99.508500275 0.5814997249999948 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf552 6.7963162944 0 99.42750015 0.6624998499999976 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf553 6.20621598565 0 99.5584997 0.531500299999999 -1 gpu conv perf 29 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf554 2.55088214386 0 99.388499675 0.4522504874999882 -1 gpu conv perf 24 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf555 3.92040413524 0 99.324499525 0.5482507125000069 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf556 2.01610051566 0 99.68 0.4099999999999909 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf557 3.86059861244 0 99.583500325 0.5064996749999949 -1 gpu conv perf 30 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf558 2.78229733114 0 99.51749825 0.572501749999995 -1 gpu conv perf 28 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf559 4.48527898013 0 99.4650002 0.624999799999992 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf560 3.13161472572 0 99.329499575 0.5407506374999969 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf561 2.57685599488 0 99.369498875 0.4807516874999891 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf562 2.81322619695 0 99.597000425 0.49299957499999325 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf563 5.33920664205 0 99.4605002 0.6294997999999993 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf564 6.7963162944 0 99.454500075 0.6354999249999992 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf565 4.73066277039 0 99.520499175 0.5695008250000001 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf566 2.50228643329 0 97.89149975 2.6977503750000054 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf567 2.50228643329 0 99.444000275 0.6459997250000044 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf568 2.57685599488 0 98.99899945 1.0365008249999974 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf569 2.01610051566 0 99.0 1.0349999999999966 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf570 3.92040413524 0 99.573499575 0.5165004249999982 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf571 6.36224047437 0 99.441499825 0.6485001750000038 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf572 3.77195447337 0 99.31999955 0.5550006749999881 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf573 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf574 2.01610051566 0 98.0 2.5349999999999966 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf575 2.50228643329 0 99.37149915 0.4777512749999886 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf576 2.50228643329 0 99.2860005 0.6059992499999964 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf577 2.50228643329 0 99.44100045 0.6489995499999935 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf578 2.81322619695 0 99.469000975 0.6209990249999976 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf579 6.36224047437 0 99.4460002 0.6439997999999975 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf580 2.01610051566 0 99.099998 0.8850029999999975 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf581 2.01610051566 0 99.32 0.5550000000000068 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf582 3.38717868509 0 99.5824997 0.5075002999999981 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf583 3.85964385182 0 99.47850095 0.6114990500000005 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf584 5.33920664205 0 99.473500525 0.616499474999992 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf585 2.57685599488 0 99.4610008 0.6289992000000041 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf586 2.50228643329 0 97.649001025 3.0614984624999906 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf587 2.50228643329 0 98.9994999 1.0357501499999913 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf588 3.38717868509 0 99.585500725 0.5044992749999949 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf589 2.01610051566 0 97.760002 2.8949969999999965 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf590 2.01610051566 0 97.900002 2.6849969999999956 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf591 2.57685599488 0 99.0359988 0.9810017999999943 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf592 2.57685599488 0 99.439000625 0.6509993749999922 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf593 2.01610051566 0 97.620003 3.104995500000001 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf594 2.01610051566 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf595 2.50228643329 0 98.7414999 1.422750150000006 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf596 2.01610051566 0 97.82 2.805000000000007 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf597 5.33920664205 0 99.43200015 0.6579998500000045 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf598 5.92620561097 0 99.445 0.6450000000000046 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf599 5.02870270579 0 99.4440004 0.6459996000000047 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf600 6.36224047437 0 99.470000675 0.6199993250000034 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf601 2.50228643329 0 99.41950015 0.6704998499999931 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf602 4.73066277039 0 99.446999975 0.6430000250000006 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf603 4.48527898013 0 99.4805003 0.609499699999995 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf604 3.13161472572 0 99.57349995 0.5165000499999991 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf605 6.30106886729 0 99.4230005 0.6669994999999972 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf606 2.57685599488 0 99.472000525 0.6179994749999992 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf607 4.73066277039 0 99.472 0.6180000000000035 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf608 5.02870270579 0 99.520999475 0.5690005250000013 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf609 2.01610051566 0 99.400002 0.6899979999999971 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf610 4.73066277039 0 99.26550005 0.6367499249999966 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf611 2.01610051566 0 99.220001 0.7049985000000021 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf612 3.92040413524 0 99.5585006 0.5314993999999956 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf613 5.33920664205 0 99.257 0.6494999999999891 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf614 2.57685599488 0 99.019500225 1.0057496624999942 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf615 4.03997047176 0 99.473000425 0.6169995750000027 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf616 4.48527898013 0 99.4760001 0.6139999000000046 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf617 5.02870270579 0 99.470500125 0.6194998749999968 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf618 5.02870270579 0 99.469500375 0.6204996250000022 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf619 2.57685599488 0 99.3729995 0.4755007499999877 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf620 6.7963162944 0 99.26299995 0.6405000750000056 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf621 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf622 2.57685599488 0 99.4455 0.6445000000000022 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf623 2.01610051566 0 99.580002 0.5099980000000045 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf624 2.57685599488 0 98.894499925 1.1932501124999888 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf625 6.30106886729 0 99.453500225 0.6364997749999987 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf626 2.81322619695 0 99.5449997 0.5450002999999924 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf627 2.50228643329 0 99.40899965 0.6810003499999994 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf628 6.30106886729 0 99.460500625 0.6294993750000032 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf629 2.57685599488 0 98.8919993 1.1970010500000043 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf630 3.92040413524 0 99.545499225 0.5445007749999974 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf631 5.92620561097 0 99.266500125 0.6352498125000068 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf632 3.38717868509 0 99.3304993 0.5392510499999972 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf633 2.50228643329 0 99.37649925 0.4702511250000043 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf634 2.50228643329 0 99.18549985 0.7567502249999976 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf635 2.50228643329 0 97.9579998 2.5980003000000025 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf636 2.01610051566 0 98.480003 1.814995500000002 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf637 2.50228643329 0 99.386499425 0.4552508625000016 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf638 2.01610051566 0 98.839996 1.2750059999999976 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf639 5.02870270579 0 99.437000225 0.6529997749999922 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf640 2.81322619695 0 99.578500325 0.5114996750000046 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf641 2.50228643329 0 98.3239996 2.0490006000000065 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf642 6.30106886729 0 99.498499875 0.5915001250000046 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf643 4.03997047176 0 99.4390005 0.6509994999999918 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf644 3.92040413524 0 99.316999875 0.5595001875000065 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf645 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf646 2.50228643329 0 99.27950065 0.6157490249999924 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf647 2.01610051566 0 99.459999 0.6300010000000015 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf648 2.57685599488 0 97.878499025 2.717251462500002 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf649 6.36224047437 0 99.46100015 0.6289998499999939 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf650 2.01610051566 0 97.699997 2.9850045000000023 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf651 2.01610051566 0 99.540001 0.549998999999994 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf652 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf653 2.01610051566 0 98.959999 1.0950015000000022 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf654 5.92620561097 0 99.4569999 0.6330000999999982 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf655 2.57685599488 0 99.266500325 0.6352495125000033 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf656 3.13161472572 0 99.579499825 0.5105001749999986 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf657 2.01610051566 0 98.18 2.2649999999999864 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf658 2.57685599488 0 98.2215007 2.202748949999986 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf659 2.81322619695 0 99.590000575 0.49999942499999295 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf660 2.57685599488 0 97.950000075 2.6099998874999883 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf661 2.57685599488 0 99.175999825 0.7710002624999888 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf662 2.57685599488 0 99.3644998 0.48825029999999003 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf663 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf664 2.57685599488 0 98.337999575 2.0280006375 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf665 2.57685599488 0 99.3804991 0.46425135000000495 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf666 6.7963162944 0 99.49299965 0.5970003499999962 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf667 5.92620561097 0 99.513500075 0.5764999250000017 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf668 4.48527898013 0 99.524999925 0.5650000749999947 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf669 3.85964385182 0 99.473000375 0.6169996249999997 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf670 4.03997047176 0 99.4490007 0.6409992999999986 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf671 3.85964385182 0 99.53549905 0.5545009499999992 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf672 4.03997047176 0 99.47100035 0.6189996500000007 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf673 3.85964385182 0 99.4680003 0.6219996999999978 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf674 3.77195447337 0 99.56099965 0.5290003499999983 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf675 4.73066277039 0 99.45050015 0.6394998500000014 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf676 3.38717868509 0 99.545499325 0.5445006750000033 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf677 3.13161472572 0 99.4750006 0.6149993999999964 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf678 2.57685599488 0 97.62400035 3.0989994749999923 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf679 2.50228643329 0 99.47150095 0.6184990499999913 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf680 2.50228643329 0 98.297999825 2.088000262499989 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf681 2.57685599488 0 99.417500325 0.6724996749999917 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf682 2.01610051566 0 98.440002 1.8749969999999863 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf683 4.48527898013 0 99.4639998 0.6260002000000014 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf684 2.50228643329 0 99.473501175 0.6164988250000022 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf685 2.50228643329 0 99.368 0.4830000000000041 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf686 2.57685599488 0 98.73399925 1.4340011250000018 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf687 3.85964385182 0 99.269000825 0.6314987624999873 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf688 5.33920664205 0 99.5154996 0.5745003999999995 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf689 3.38717868509 0 99.4760001 0.6139999000000046 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf690 6.30106886729 0 99.44300025 0.6469997500000005 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf691 4.73066277039 0 99.45850055 0.6314994500000012 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf692 3.77195447337 0 99.582999775 0.5070002249999931 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf693 2.50228643329 0 99.03049945 0.9892508250000063 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf694 2.01610051566 0 98.900002 1.1849969999999956 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf695 2.57685599488 0 99.3689987 0.48150194999998774 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf696 6.7963162944 0 99.43299975 0.6570002500000044 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf697 3.38717868509 0 99.5629995 0.5270004999999941 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf698 4.48527898013 0 99.428500425 0.661499575000002 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf699 2.01610051566 0 99.080002 0.9149970000000067 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf700 2.50228643329 0 98.90549945 1.1767508250000063 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf701 2.81322619695 0 99.334498375 0.5332524375000034 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf702 4.03997047176 0 99.526499175 0.5635008249999999 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf703 2.01610051566 0 98.540001 1.724998499999991 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf704 3.85964385182 0 99.437500375 0.6524996249999987 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf705 5.92620561097 0 99.46749985 0.6225001500000019 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf706 3.77195447337 0 99.54699895 0.5430010499999952 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf707 2.01610051566 0 98.120003 2.354995500000001 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf708 6.30106886729 0 99.265 0.6374999999999957 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf709 2.01610051566 0 97.879997 2.715004499999992 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf710 3.13161472572 0 99.591500275 0.4984997249999964 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf711 6.36224047437 0 99.26650015 0.6352497749999984 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf712 3.13161472572 0 99.543499225 0.5465007749999927 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf713 2.01610051566 0 99.160004 0.7949939999999955 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf714 2.50228643329 0 99.097500225 0.8887496624999898 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf715 5.92620561097 0 99.471999375 0.618000625000002 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf716 2.01610051566 0 99.68 0.4099999999999909 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf717 2.50228643329 0 99.0050001 1.027499849999991 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf718 2.57685599488 0 98.3044998 2.0782502999999934 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf719 4.03997047176 0 99.2690006 0.6314990999999992 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf720 3.13161472572 0 99.339998875 0.5250016874999872 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf721 2.50228643329 0 98.887998975 1.2030015374999934 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf722 6.36224047437 0 99.50550005 0.5844999500000029 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf723 2.57685599488 0 98.899499525 1.1857507125000026 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf724 3.77195447337 0 99.58400025 0.5059997499999952 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 4 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf725 5.33920664205 0 99.4505002 0.6394998000000044 -1 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf726 2.57685599488 0 99.397999825 0.6920001749999983 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf727 4.48527898013 0 99.2640007 0.6389989500000013 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 6 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf728 5.02870270579 0 99.26750035 0.6337494749999877 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf729 6.7963162944 0 99.4540009 0.6359991000000008 -1 gpu conv perf 22 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf730 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf731 6.7963162944 0 99.429999925 0.6600000749999936 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf732 2.57685599488 0 99.281500125 0.6127498125000059 -1 gpu conv samp 31 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv samp 32 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf733 2.57685599488 0 99.0864994 0.9052509000000057 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 3 promise swing_level 6 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf734 3.92040413524 0 99.556999775 0.5330002250000035 -1 gpu conv perf 27 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- +++++ conf735 3.92040413524 0 99.47550035 0.6144996499999934 -1 gpu conv perf 26 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 2 promise swing_level 3 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 +5 gpu softmax fp16 1 ----- diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_confs_batch220.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_confs_batch220.txt index f41cf972dc..948efe5bd7 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_confs_batch220.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_confs_batch220.txt @@ -169,8 +169,8 @@ conf1 1.66592032533 0 82.900002 0.899996999999999 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf2 1.66766290747 0 82.860001 0.9599985000000046 @@ -256,8 +256,8 @@ conf2 1.66766290747 0 82.860001 0.9599985000000046 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf3 1.66670139642 0 82.900002 0.899996999999999 @@ -343,8 +343,8 @@ conf3 1.66670139642 0 82.900002 0.899996999999999 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf4 1.66748320027 0 82.940002 0.8399969999999897 @@ -430,8 +430,8 @@ conf4 1.66748320027 0 82.940002 0.8399969999999897 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf5 1.66826573791 0 82.940002 0.8399969999999897 @@ -517,8 +517,8 @@ conf5 1.66826573791 0 82.940002 0.8399969999999897 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf6 1.55813665736 0 82.900002 0.899996999999999 @@ -604,8 +604,8 @@ conf6 1.55813665736 0 82.900002 0.899996999999999 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf7 1.62488529847 0 82.979996 0.7800060000000002 @@ -691,8 +691,8 @@ conf7 1.62488529847 0 82.979996 0.7800060000000002 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf8 1.66748320027 0 83.080002 0.6299970000000101 @@ -778,8 +778,8 @@ conf8 1.66748320027 0 83.080002 0.6299970000000101 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf9 1.62414291979 0 82.979996 0.7800060000000002 @@ -865,8 +865,8 @@ conf9 1.62414291979 0 82.979996 0.7800060000000002 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf10 1.63389893891 0 83.12001 0.5699850000000097 @@ -952,8 +952,8 @@ conf10 1.63389893891 0 83.12001 0.5699850000000097 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf11 1.63332068534 0 83.119995 0.5700074999999956 @@ -1039,8 +1039,8 @@ conf11 1.63332068534 0 83.119995 0.5700074999999956 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf12 1.66826573791 0 83.080002 0.6299970000000101 @@ -1126,8 +1126,8 @@ conf12 1.66826573791 0 83.080002 0.6299970000000101 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf13 1.66345526231 0 83.019997 0.7200044999999946 @@ -1213,8 +1213,8 @@ conf13 1.66345526231 0 83.019997 0.7200044999999946 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf14 1.62414291979 0 82.860001 0.9599985000000046 @@ -1300,8 +1300,8 @@ conf14 1.62414291979 0 82.860001 0.9599985000000046 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf15 1.66867727615 0 83.219994 0.6800060000000002 @@ -1387,8 +1387,8 @@ conf15 1.66867727615 0 83.219994 0.6800060000000002 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf16 1.6939272698 0 82.360001 1.7099985000000046 @@ -1474,8 +1474,8 @@ conf16 1.6939272698 0 82.360001 1.7099985000000046 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf17 1.69125907336 0 82.979996 0.7800060000000002 @@ -1561,8 +1561,8 @@ conf17 1.69125907336 0 82.979996 0.7800060000000002 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf18 1.66267723003 0 82.880005 0.9299925000000044 @@ -1648,8 +1648,8 @@ conf18 1.66267723003 0 82.880005 0.9299925000000044 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf19 1.69414545349 0 82.259995 1.8600074999999947 @@ -1735,8 +1735,8 @@ conf19 1.69414545349 0 82.259995 1.8600074999999947 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf20 1.66846560269 0 83.039993 0.6900105000000067 @@ -1822,8 +1822,8 @@ conf20 1.66846560269 0 83.039993 0.6900105000000067 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf21 1.69333845447 0 82.360001 1.7099985000000046 @@ -1909,8 +1909,8 @@ conf21 1.69333845447 0 82.360001 1.7099985000000046 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf22 1.68774460395 0 82.239998 1.890003 @@ -1996,8 +1996,8 @@ conf22 1.68774460395 0 82.239998 1.890003 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf23 1.69063951413 0 82.899994 0.9000089999999901 @@ -2083,8 +2083,8 @@ conf23 1.69063951413 0 82.899994 0.9000089999999901 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf24 1.66867727615 0 83.659996 0.24000399999999333 @@ -2170,8 +2170,8 @@ conf24 1.66867727615 0 83.659996 0.24000399999999333 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf25 1.69454924151 0 82.519997 1.4700044999999946 @@ -2257,8 +2257,8 @@ conf25 1.69454924151 0 82.519997 1.4700044999999946 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf26 1.68718051806 0 83.020004 0.7199939999999998 @@ -2344,8 +2344,8 @@ conf26 1.68718051806 0 83.020004 0.7199939999999998 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf27 1.69495322205 0 82.379997 1.6800044999999955 @@ -2431,8 +2431,8 @@ conf27 1.69495322205 0 82.379997 1.6800044999999955 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf28 1.66267723003 0 83.100006 0.5999910000000099 @@ -2518,8 +2518,8 @@ conf28 1.66267723003 0 83.100006 0.5999910000000099 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf29 1.66867727615 0 83.119995 0.5700074999999956 @@ -2605,8 +2605,8 @@ conf29 1.66867727615 0 83.119995 0.5700074999999956 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf30 1.69374185785 0 82.239998 1.890003 @@ -2692,8 +2692,8 @@ conf30 1.69374185785 0 82.239998 1.890003 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf31 1.64669056053 0 82.880005 0.9299925000000044 @@ -2779,8 +2779,8 @@ conf31 1.64669056053 0 82.880005 0.9299925000000044 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf32 1.69166148649 0 82.800003 1.0499954999999943 @@ -2866,8 +2866,8 @@ conf32 1.69166148649 0 82.800003 1.0499954999999943 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf33 1.66867727615 0 83.219994 0.6800060000000002 @@ -2953,8 +2953,8 @@ conf33 1.66867727615 0 83.219994 0.6800060000000002 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf34 1.6939272698 0 82.240005 1.8899925000000053 @@ -3040,8 +3040,8 @@ conf34 1.6939272698 0 82.240005 1.8899925000000053 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf35 1.69290255882 0 82.259995 1.8600074999999947 @@ -3127,8 +3127,8 @@ conf35 1.69290255882 0 82.259995 1.8600074999999947 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf36 1.64980944683 0 82.32 1.7700000000000102 @@ -3214,8 +3214,8 @@ conf36 1.64980944683 0 82.32 1.7700000000000102 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf37 1.66750316607 0 83.379997 0.520002999999997 @@ -3301,8 +3301,8 @@ conf37 1.66750316607 0 83.379997 0.520002999999997 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf38 1.58532838785 0 82.259995 1.8600074999999947 @@ -3388,8 +3388,8 @@ conf38 1.58532838785 0 82.259995 1.8600074999999947 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf39 1.6384977922 0 83.060005 0.6599924999999942 @@ -3475,8 +3475,8 @@ conf39 1.6384977922 0 83.060005 0.6599924999999942 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf40 1.69312047859 0 82.219994 1.9200090000000003 @@ -3562,8 +3562,8 @@ conf40 1.69312047859 0 82.219994 1.9200090000000003 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf41 1.66867727615 0 82.720001 1.1699985000000055 @@ -3649,8 +3649,8 @@ conf41 1.66867727615 0 82.720001 1.1699985000000055 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf42 1.58568196474 0 82.280006 1.8299909999999997 @@ -3736,8 +3736,8 @@ conf42 1.58568196474 0 82.280006 1.8299909999999997 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf43 1.67188008661 0 83.060005 0.6599924999999942 @@ -3823,8 +3823,8 @@ conf43 1.67188008661 0 83.060005 0.6599924999999942 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf44 1.68426507974 0 82.300003 1.7999954999999943 @@ -3910,8 +3910,8 @@ conf44 1.68426507974 0 82.300003 1.7999954999999943 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf45 1.67470064441 0 81.800003 2.5499954999999943 @@ -3997,8 +3997,8 @@ conf45 1.67470064441 0 81.800003 2.5499954999999943 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf46 1.5655260791 0 82.060005 2.159992499999994 @@ -4084,8 +4084,8 @@ conf46 1.5655260791 0 82.060005 2.159992499999994 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf47 1.72373997515 0 82.240005 1.8899925000000053 @@ -4171,8 +4171,8 @@ conf47 1.72373997515 0 82.240005 1.8899925000000053 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf48 1.71832409894 0 82.400002 1.649996999999999 @@ -4258,8 +4258,8 @@ conf48 1.71832409894 0 82.400002 1.649996999999999 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf49 1.70069521045 0 83.080002 0.6299970000000101 @@ -4345,8 +4345,8 @@ conf49 1.70069521045 0 83.080002 0.6299970000000101 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf50 1.67391205968 0 82.18 1.9799999999999898 @@ -4432,8 +4432,8 @@ conf50 1.67391205968 0 82.18 1.9799999999999898 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf51 1.72248712959 0 81.920006 2.369990999999999 @@ -4519,8 +4519,8 @@ conf51 1.72248712959 0 81.920006 2.369990999999999 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf52 1.66577380924 0 82.18 1.9799999999999898 @@ -4606,8 +4606,8 @@ conf52 1.66577380924 0 82.18 1.9799999999999898 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf53 1.72415799543 0 81.539993 2.9400105000000067 @@ -4693,8 +4693,8 @@ conf53 1.72415799543 0 81.539993 2.9400105000000067 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf54 1.69663672592 0 82.559998 1.4100030000000103 @@ -4780,8 +4780,8 @@ conf54 1.69663672592 0 82.559998 1.4100030000000103 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf55 1.7199868941 0 82.320007 1.769989499999994 @@ -4867,8 +4867,8 @@ conf55 1.7199868941 0 82.320007 1.769989499999994 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf56 1.67391205968 0 81.800003 2.5499954999999943 @@ -4954,8 +4954,8 @@ conf56 1.67391205968 0 81.800003 2.5499954999999943 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf57 1.72415799543 0 82.240005 1.8899925000000053 @@ -5041,8 +5041,8 @@ conf57 1.72415799543 0 82.240005 1.8899925000000053 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf58 1.67369905614 0 82.440002 1.5899969999999897 @@ -5128,8 +5128,8 @@ conf58 1.67369905614 0 82.440002 1.5899969999999897 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf59 1.69988196097 0 82.459999 1.5600015000000056 @@ -5215,8 +5215,8 @@ conf59 1.69988196097 0 82.459999 1.5600015000000056 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf60 1.72332215751 0 82.540001 1.4399984999999944 @@ -5302,8 +5302,8 @@ conf60 1.72332215751 0 82.540001 1.4399984999999944 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf61 1.56656092908 0 82.440002 1.5899969999999897 @@ -5389,8 +5389,8 @@ conf61 1.56656092908 0 82.440002 1.5899969999999897 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf62 1.66577380924 0 82.819992 1.0200120000000013 @@ -5476,8 +5476,8 @@ conf62 1.66577380924 0 82.819992 1.0200120000000013 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf63 1.72415799543 0 81.599998 2.850003000000001 @@ -5563,8 +5563,8 @@ conf63 1.72415799543 0 81.599998 2.850003000000001 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf64 1.61121417569 0 81.899994 2.40000899999999 @@ -5650,8 +5650,8 @@ conf64 1.61121417569 0 81.899994 2.40000899999999 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf65 1.72081949906 0 82.360001 1.7099985000000046 @@ -5737,8 +5737,8 @@ conf65 1.72081949906 0 82.360001 1.7099985000000046 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf66 1.64082797814 0 82.240005 1.8899925000000053 @@ -5824,8 +5824,8 @@ conf66 1.64082797814 0 82.240005 1.8899925000000053 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf67 1.67470064441 0 81.500008 2.999988000000009 @@ -5911,8 +5911,8 @@ conf67 1.67470064441 0 81.500008 2.999988000000009 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf68 1.69663672592 0 82.040001 2.1899984999999944 @@ -5998,8 +5998,8 @@ conf68 1.69663672592 0 82.040001 2.1899984999999944 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf69 1.72415799543 0 81.860001 2.4599985000000046 @@ -6085,8 +6085,8 @@ conf69 1.72415799543 0 81.860001 2.4599985000000046 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf70 1.67470064441 0 82.639999 1.2900014999999954 @@ -6172,8 +6172,8 @@ conf70 1.67470064441 0 82.639999 1.2900014999999954 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf71 1.67470064441 0 82.639999 1.2900014999999954 @@ -6259,8 +6259,8 @@ conf71 1.67470064441 0 82.639999 1.2900014999999954 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf72 1.72309639446 0 82.379997 1.6800044999999955 @@ -6346,8 +6346,8 @@ conf72 1.72309639446 0 82.379997 1.6800044999999955 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf73 1.67786242745 0 82.180008 1.9799879999999987 @@ -6433,8 +6433,8 @@ conf73 1.67786242745 0 82.180008 1.9799879999999987 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf74 1.72332215751 0 82.759995 1.1100074999999947 @@ -6520,8 +6520,8 @@ conf74 1.72332215751 0 82.759995 1.1100074999999947 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf75 1.7199868941 0 82.099998 2.100003000000001 @@ -6607,8 +6607,8 @@ conf75 1.7199868941 0 82.099998 2.100003000000001 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf76 1.56621582711 0 82.419998 1.6200029999999899 @@ -6694,8 +6694,8 @@ conf76 1.56621582711 0 82.419998 1.6200029999999899 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf77 1.72332215751 0 82.579994 1.3800090000000012 @@ -6781,8 +6781,8 @@ conf77 1.72332215751 0 82.579994 1.3800090000000012 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf78 1.7216529105 0 82.400002 1.649996999999999 @@ -6868,8 +6868,8 @@ conf78 1.7216529105 0 82.400002 1.649996999999999 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf79 1.72332215751 0 82.540001 1.4399984999999944 @@ -6955,8 +6955,8 @@ conf79 1.72332215751 0 82.540001 1.4399984999999944 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf80 1.72373997515 0 81.580002 2.87999700000001 @@ -7042,8 +7042,8 @@ conf80 1.72373997515 0 81.580002 2.87999700000001 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf81 1.72373997515 0 82.0 2.25 @@ -7129,8 +7129,8 @@ conf81 1.72373997515 0 82.0 2.25 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf82 1.70069521045 0 82.18 1.9799999999999898 @@ -7216,8 +7216,8 @@ conf82 1.70069521045 0 82.18 1.9799999999999898 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf83 1.72415799543 0 82.020004 2.219994 @@ -7303,8 +7303,8 @@ conf83 1.72415799543 0 82.020004 2.219994 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf84 1.72415799543 0 81.860001 2.4599985000000046 @@ -7390,8 +7390,8 @@ conf84 1.72415799543 0 81.860001 2.4599985000000046 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf85 1.72309639446 0 82.199997 1.9500045000000057 @@ -7477,8 +7477,8 @@ conf85 1.72309639446 0 82.199997 1.9500045000000057 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf86 1.56621582711 0 81.659996 2.76000599999999 @@ -7564,8 +7564,8 @@ conf86 1.56621582711 0 81.659996 2.76000599999999 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf87 1.66865728222 0 82.080002 2.12999700000001 @@ -7651,8 +7651,8 @@ conf87 1.66865728222 0 82.080002 2.12999700000001 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf88 1.67391205968 0 81.659996 2.76000599999999 @@ -7738,8 +7738,8 @@ conf88 1.67391205968 0 81.659996 2.76000599999999 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf89 1.69641790012 0 82.639999 1.2900014999999954 @@ -7825,8 +7825,8 @@ conf89 1.69641790012 0 82.639999 1.2900014999999954 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf90 1.67470064441 0 82.379997 1.6800044999999955 @@ -7912,8 +7912,8 @@ conf90 1.67470064441 0 82.379997 1.6800044999999955 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf91 1.70069521045 0 82.139999 2.0400014999999954 @@ -7999,8 +7999,8 @@ conf91 1.70069521045 0 82.139999 2.0400014999999954 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf92 1.67430625919 0 81.680008 2.7299879999999987 @@ -8086,8 +8086,8 @@ conf92 1.67430625919 0 81.680008 2.7299879999999987 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf93 1.72267888872 0 81.580002 2.87999700000001 @@ -8173,8 +8173,8 @@ conf93 1.72267888872 0 81.580002 2.87999700000001 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf94 1.56794285911 0 81.68 2.7299999999999898 @@ -8260,8 +8260,8 @@ conf94 1.56794285911 0 81.68 2.7299999999999898 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf95 1.72081949906 0 82.159996 2.01000599999999 @@ -8347,8 +8347,8 @@ conf95 1.72081949906 0 82.159996 2.01000599999999 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf96 1.70069521045 0 82.18 1.9799999999999898 @@ -8434,8 +8434,8 @@ conf96 1.70069521045 0 82.18 1.9799999999999898 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf97 1.66167014075 0 82.880005 0.9299925000000044 @@ -8521,8 +8521,8 @@ conf97 1.66167014075 0 82.880005 0.9299925000000044 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf98 1.72248712959 0 82.120003 2.0699955000000045 @@ -8608,8 +8608,8 @@ conf98 1.72248712959 0 82.120003 2.0699955000000045 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf99 1.72415799543 0 81.800003 2.5499954999999943 @@ -8695,8 +8695,8 @@ conf99 1.72415799543 0 81.800003 2.5499954999999943 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf100 1.72415799543 0 82.020004 2.219994 @@ -8782,6 +8782,6 @@ conf100 1.72415799543 0 82.020004 2.219994 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_pareto_confs_batch220.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_pareto_confs_batch220.txt index 1fef0e3d2c..66833d06b3 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_pareto_confs_batch220.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_pareto_confs_batch220.txt @@ -169,8 +169,8 @@ conf1 1.66592032533 0 82.900002 0.899996999999999 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf2 1.66766290747 0 82.860001 0.9599985000000046 @@ -256,8 +256,8 @@ conf2 1.66766290747 0 82.860001 0.9599985000000046 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf3 1.66670139642 0 82.900002 0.899996999999999 @@ -343,8 +343,8 @@ conf3 1.66670139642 0 82.900002 0.899996999999999 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf4 1.66748320027 0 82.940002 0.8399969999999897 @@ -430,8 +430,8 @@ conf4 1.66748320027 0 82.940002 0.8399969999999897 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf5 1.66826573791 0 82.940002 0.8399969999999897 @@ -517,8 +517,8 @@ conf5 1.66826573791 0 82.940002 0.8399969999999897 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf6 1.62488529847 0 82.979996 0.7800060000000002 @@ -604,8 +604,8 @@ conf6 1.62488529847 0 82.979996 0.7800060000000002 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf7 1.66748320027 0 83.080002 0.6299970000000101 @@ -691,8 +691,8 @@ conf7 1.66748320027 0 83.080002 0.6299970000000101 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf8 1.62414291979 0 82.979996 0.7800060000000002 @@ -778,8 +778,8 @@ conf8 1.62414291979 0 82.979996 0.7800060000000002 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf9 1.63389893891 0 83.12001 0.5699850000000097 @@ -865,8 +865,8 @@ conf9 1.63389893891 0 83.12001 0.5699850000000097 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf10 1.63332068534 0 83.119995 0.5700074999999956 @@ -952,8 +952,8 @@ conf10 1.63332068534 0 83.119995 0.5700074999999956 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf11 1.66826573791 0 83.080002 0.6299970000000101 @@ -1039,8 +1039,8 @@ conf11 1.66826573791 0 83.080002 0.6299970000000101 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf12 1.66345526231 0 83.019997 0.7200044999999946 @@ -1126,8 +1126,8 @@ conf12 1.66345526231 0 83.019997 0.7200044999999946 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf13 1.62414291979 0 82.860001 0.9599985000000046 @@ -1213,8 +1213,8 @@ conf13 1.62414291979 0 82.860001 0.9599985000000046 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf14 1.66867727615 0 83.219994 0.6800060000000002 @@ -1300,8 +1300,8 @@ conf14 1.66867727615 0 83.219994 0.6800060000000002 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf15 1.6939272698 0 82.360001 1.7099985000000046 @@ -1387,8 +1387,8 @@ conf15 1.6939272698 0 82.360001 1.7099985000000046 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf16 1.69125907336 0 82.979996 0.7800060000000002 @@ -1474,8 +1474,8 @@ conf16 1.69125907336 0 82.979996 0.7800060000000002 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf17 1.66267723003 0 82.880005 0.9299925000000044 @@ -1561,8 +1561,8 @@ conf17 1.66267723003 0 82.880005 0.9299925000000044 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf18 1.69414545349 0 82.259995 1.8600074999999947 @@ -1648,8 +1648,8 @@ conf18 1.69414545349 0 82.259995 1.8600074999999947 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf19 1.66846560269 0 83.039993 0.6900105000000067 @@ -1735,8 +1735,8 @@ conf19 1.66846560269 0 83.039993 0.6900105000000067 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf20 1.69333845447 0 82.360001 1.7099985000000046 @@ -1822,8 +1822,8 @@ conf20 1.69333845447 0 82.360001 1.7099985000000046 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf21 1.68774460395 0 82.239998 1.890003 @@ -1909,8 +1909,8 @@ conf21 1.68774460395 0 82.239998 1.890003 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf22 1.69063951413 0 82.899994 0.9000089999999901 @@ -1996,8 +1996,8 @@ conf22 1.69063951413 0 82.899994 0.9000089999999901 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf23 1.66867727615 0 83.659996 0.24000399999999333 @@ -2083,8 +2083,8 @@ conf23 1.66867727615 0 83.659996 0.24000399999999333 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf24 1.69454924151 0 82.519997 1.4700044999999946 @@ -2170,8 +2170,8 @@ conf24 1.69454924151 0 82.519997 1.4700044999999946 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf25 1.68718051806 0 83.020004 0.7199939999999998 @@ -2257,8 +2257,8 @@ conf25 1.68718051806 0 83.020004 0.7199939999999998 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf26 1.69495322205 0 82.379997 1.6800044999999955 @@ -2344,8 +2344,8 @@ conf26 1.69495322205 0 82.379997 1.6800044999999955 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf27 1.66267723003 0 83.100006 0.5999910000000099 @@ -2431,8 +2431,8 @@ conf27 1.66267723003 0 83.100006 0.5999910000000099 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf28 1.66867727615 0 83.119995 0.5700074999999956 @@ -2518,8 +2518,8 @@ conf28 1.66867727615 0 83.119995 0.5700074999999956 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf29 1.69374185785 0 82.239998 1.890003 @@ -2605,8 +2605,8 @@ conf29 1.69374185785 0 82.239998 1.890003 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf30 1.64669056053 0 82.880005 0.9299925000000044 @@ -2692,8 +2692,8 @@ conf30 1.64669056053 0 82.880005 0.9299925000000044 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf31 1.69166148649 0 82.800003 1.0499954999999943 @@ -2779,8 +2779,8 @@ conf31 1.69166148649 0 82.800003 1.0499954999999943 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf32 1.66867727615 0 83.219994 0.6800060000000002 @@ -2866,8 +2866,8 @@ conf32 1.66867727615 0 83.219994 0.6800060000000002 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf33 1.6939272698 0 82.240005 1.8899925000000053 @@ -2953,8 +2953,8 @@ conf33 1.6939272698 0 82.240005 1.8899925000000053 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf34 1.69290255882 0 82.259995 1.8600074999999947 @@ -3040,8 +3040,8 @@ conf34 1.69290255882 0 82.259995 1.8600074999999947 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf35 1.64980944683 0 82.32 1.7700000000000102 @@ -3127,8 +3127,8 @@ conf35 1.64980944683 0 82.32 1.7700000000000102 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf36 1.66750316607 0 83.379997 0.520002999999997 @@ -3214,8 +3214,8 @@ conf36 1.66750316607 0 83.379997 0.520002999999997 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf37 1.6384977922 0 83.060005 0.6599924999999942 @@ -3301,8 +3301,8 @@ conf37 1.6384977922 0 83.060005 0.6599924999999942 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf38 1.69312047859 0 82.219994 1.9200090000000003 @@ -3388,8 +3388,8 @@ conf38 1.69312047859 0 82.219994 1.9200090000000003 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf39 1.66867727615 0 82.720001 1.1699985000000055 @@ -3475,8 +3475,8 @@ conf39 1.66867727615 0 82.720001 1.1699985000000055 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf40 1.67188008661 0 83.060005 0.6599924999999942 @@ -3562,8 +3562,8 @@ conf40 1.67188008661 0 83.060005 0.6599924999999942 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf41 1.68426507974 0 82.300003 1.7999954999999943 @@ -3649,8 +3649,8 @@ conf41 1.68426507974 0 82.300003 1.7999954999999943 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf42 1.67470064441 0 81.800003 2.5499954999999943 @@ -3736,8 +3736,8 @@ conf42 1.67470064441 0 81.800003 2.5499954999999943 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf43 1.72373997515 0 82.240005 1.8899925000000053 @@ -3823,8 +3823,8 @@ conf43 1.72373997515 0 82.240005 1.8899925000000053 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf44 1.71832409894 0 82.400002 1.649996999999999 @@ -3910,8 +3910,8 @@ conf44 1.71832409894 0 82.400002 1.649996999999999 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf45 1.70069521045 0 83.080002 0.6299970000000101 @@ -3997,8 +3997,8 @@ conf45 1.70069521045 0 83.080002 0.6299970000000101 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf46 1.72248712959 0 81.920006 2.369990999999999 @@ -4084,8 +4084,8 @@ conf46 1.72248712959 0 81.920006 2.369990999999999 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf47 1.72415799543 0 81.539993 2.9400105000000067 @@ -4171,8 +4171,8 @@ conf47 1.72415799543 0 81.539993 2.9400105000000067 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf48 1.69663672592 0 82.559998 1.4100030000000103 @@ -4258,8 +4258,8 @@ conf48 1.69663672592 0 82.559998 1.4100030000000103 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf49 1.7199868941 0 82.320007 1.769989499999994 @@ -4345,8 +4345,8 @@ conf49 1.7199868941 0 82.320007 1.769989499999994 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf50 1.72415799543 0 82.240005 1.8899925000000053 @@ -4432,8 +4432,8 @@ conf50 1.72415799543 0 82.240005 1.8899925000000053 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf51 1.67369905614 0 82.440002 1.5899969999999897 @@ -4519,8 +4519,8 @@ conf51 1.67369905614 0 82.440002 1.5899969999999897 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf52 1.69988196097 0 82.459999 1.5600015000000056 @@ -4606,8 +4606,8 @@ conf52 1.69988196097 0 82.459999 1.5600015000000056 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf53 1.72332215751 0 82.540001 1.4399984999999944 @@ -4693,8 +4693,8 @@ conf53 1.72332215751 0 82.540001 1.4399984999999944 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf54 1.66577380924 0 82.819992 1.0200120000000013 @@ -4780,8 +4780,8 @@ conf54 1.66577380924 0 82.819992 1.0200120000000013 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf55 1.72415799543 0 81.599998 2.850003000000001 @@ -4867,8 +4867,8 @@ conf55 1.72415799543 0 81.599998 2.850003000000001 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf56 1.72081949906 0 82.360001 1.7099985000000046 @@ -4954,8 +4954,8 @@ conf56 1.72081949906 0 82.360001 1.7099985000000046 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf57 1.67470064441 0 81.500008 2.999988000000009 @@ -5041,8 +5041,8 @@ conf57 1.67470064441 0 81.500008 2.999988000000009 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf58 1.69663672592 0 82.040001 2.1899984999999944 @@ -5128,8 +5128,8 @@ conf58 1.69663672592 0 82.040001 2.1899984999999944 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf59 1.72415799543 0 81.860001 2.4599985000000046 @@ -5215,8 +5215,8 @@ conf59 1.72415799543 0 81.860001 2.4599985000000046 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf60 1.67470064441 0 82.639999 1.2900014999999954 @@ -5302,8 +5302,8 @@ conf60 1.67470064441 0 82.639999 1.2900014999999954 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf61 1.67470064441 0 82.639999 1.2900014999999954 @@ -5389,8 +5389,8 @@ conf61 1.67470064441 0 82.639999 1.2900014999999954 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf62 1.72309639446 0 82.379997 1.6800044999999955 @@ -5476,8 +5476,8 @@ conf62 1.72309639446 0 82.379997 1.6800044999999955 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf63 1.67786242745 0 82.180008 1.9799879999999987 @@ -5563,8 +5563,8 @@ conf63 1.67786242745 0 82.180008 1.9799879999999987 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf64 1.72332215751 0 82.759995 1.1100074999999947 @@ -5650,8 +5650,8 @@ conf64 1.72332215751 0 82.759995 1.1100074999999947 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf65 1.7199868941 0 82.099998 2.100003000000001 @@ -5737,8 +5737,8 @@ conf65 1.7199868941 0 82.099998 2.100003000000001 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf66 1.72332215751 0 82.579994 1.3800090000000012 @@ -5824,8 +5824,8 @@ conf66 1.72332215751 0 82.579994 1.3800090000000012 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf67 1.7216529105 0 82.400002 1.649996999999999 @@ -5911,8 +5911,8 @@ conf67 1.7216529105 0 82.400002 1.649996999999999 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf68 1.72332215751 0 82.540001 1.4399984999999944 @@ -5998,8 +5998,8 @@ conf68 1.72332215751 0 82.540001 1.4399984999999944 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf69 1.72373997515 0 81.580002 2.87999700000001 @@ -6085,8 +6085,8 @@ conf69 1.72373997515 0 81.580002 2.87999700000001 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf70 1.72373997515 0 82.0 2.25 @@ -6172,8 +6172,8 @@ conf70 1.72373997515 0 82.0 2.25 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf71 1.70069521045 0 82.18 1.9799999999999898 @@ -6259,8 +6259,8 @@ conf71 1.70069521045 0 82.18 1.9799999999999898 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf72 1.72415799543 0 82.020004 2.219994 @@ -6346,8 +6346,8 @@ conf72 1.72415799543 0 82.020004 2.219994 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf73 1.72415799543 0 81.860001 2.4599985000000046 @@ -6433,8 +6433,8 @@ conf73 1.72415799543 0 81.860001 2.4599985000000046 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf74 1.72309639446 0 82.199997 1.9500045000000057 @@ -6520,8 +6520,8 @@ conf74 1.72309639446 0 82.199997 1.9500045000000057 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf75 1.69641790012 0 82.639999 1.2900014999999954 @@ -6607,8 +6607,8 @@ conf75 1.69641790012 0 82.639999 1.2900014999999954 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf76 1.67470064441 0 82.379997 1.6800044999999955 @@ -6694,8 +6694,8 @@ conf76 1.67470064441 0 82.379997 1.6800044999999955 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf77 1.70069521045 0 82.139999 2.0400014999999954 @@ -6781,8 +6781,8 @@ conf77 1.70069521045 0 82.139999 2.0400014999999954 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf78 1.67430625919 0 81.680008 2.7299879999999987 @@ -6868,8 +6868,8 @@ conf78 1.67430625919 0 81.680008 2.7299879999999987 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf79 1.72267888872 0 81.580002 2.87999700000001 @@ -6955,8 +6955,8 @@ conf79 1.72267888872 0 81.580002 2.87999700000001 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf80 1.72081949906 0 82.159996 2.01000599999999 @@ -7042,8 +7042,8 @@ conf80 1.72081949906 0 82.159996 2.01000599999999 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf81 1.70069521045 0 82.18 1.9799999999999898 @@ -7129,8 +7129,8 @@ conf81 1.70069521045 0 82.18 1.9799999999999898 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf82 1.66167014075 0 82.880005 0.9299925000000044 @@ -7216,8 +7216,8 @@ conf82 1.66167014075 0 82.880005 0.9299925000000044 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf83 1.72248712959 0 82.120003 2.0699955000000045 @@ -7303,8 +7303,8 @@ conf83 1.72248712959 0 82.120003 2.0699955000000045 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf84 1.72415799543 0 81.800003 2.5499954999999943 @@ -7390,8 +7390,8 @@ conf84 1.72415799543 0 81.800003 2.5499954999999943 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf85 1.72415799543 0 82.020004 2.219994 @@ -7477,6 +7477,6 @@ conf85 1.72415799543 0 82.020004 2.219994 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_promise_confs_batch220_multi.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_promise_confs_batch220_multi.txt index 2ab5aaa91f..baffc18545 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_promise_confs_batch220_multi.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_promise_confs_batch220_multi.txt @@ -170,7 +170,7 @@ conf2 2.98991537361 0 83.386665875 0.5133341249999944 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf3 3.88159289347 0 83.2783331 0.6216669000000025 @@ -257,7 +257,7 @@ conf3 3.88159289347 0 83.2783331 0.6216669000000025 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf4 4.14749473048 0 83.220000325 0.6799996749999991 @@ -344,7 +344,7 @@ conf4 4.14749473048 0 83.220000325 0.6799996749999991 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf5 4.4175200707 0 83.219999875 0.6800001250000008 @@ -431,7 +431,7 @@ conf5 4.4175200707 0 83.219999875 0.6800001250000008 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf6 4.43502212401 0 83.155834675 0.5162479875000088 @@ -518,7 +518,7 @@ conf6 4.43502212401 0 83.155834675 0.5162479875000088 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf7 4.10832403497 0 83.103333275 0.5950000875000043 @@ -605,7 +605,7 @@ conf7 4.10832403497 0 83.103333275 0.5950000875000043 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf8 3.31453105661 0 82.59083295 1.3637505749999903 @@ -692,7 +692,7 @@ conf8 3.31453105661 0 82.59083295 1.3637505749999903 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf9 3.52220799908 0 82.4716658 1.542501300000005 @@ -779,7 +779,7 @@ conf9 3.52220799908 0 82.4716658 1.542501300000005 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf10 3.44814122333 0 82.51916615 1.4712507749999943 @@ -866,7 +866,7 @@ conf10 3.44814122333 0 82.51916615 1.4712507749999943 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf11 3.02800023045 0 82.64999965 1.275000525000003 @@ -953,7 +953,7 @@ conf11 3.02800023045 0 82.64999965 1.275000525000003 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf12 3.68207420915 0 82.448332775 1.5775008375000041 @@ -1040,7 +1040,7 @@ conf12 3.68207420915 0 82.448332775 1.5775008375000041 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf13 3.862916011 0 82.7708336 1.0937495999999953 @@ -1127,7 +1127,7 @@ conf13 3.862916011 0 82.7708336 1.0937495999999953 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf14 3.7573272945 0 82.422500075 1.6162498874999969 @@ -1214,7 +1214,7 @@ conf14 3.7573272945 0 82.422500075 1.6162498874999969 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf15 3.93066025121 0 82.594167 1.3587495000000018 @@ -1301,7 +1301,7 @@ conf15 3.93066025121 0 82.594167 1.3587495000000018 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf16 3.87800695966 0 82.430000125 1.6049998124999902 @@ -1388,7 +1388,7 @@ conf16 3.87800695966 0 82.430000125 1.6049998124999902 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf17 3.68207420915 0 82.56333275 1.405000874999999 @@ -1475,7 +1475,7 @@ conf17 3.68207420915 0 82.56333275 1.405000874999999 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf18 3.22097285505 0 83.564167475 0.33583252499999505 @@ -1562,7 +1562,7 @@ conf18 3.22097285505 0 83.564167475 0.33583252499999505 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf19 3.82500219093 0 82.9275003 0.8587495499999918 @@ -1649,7 +1649,7 @@ conf19 3.82500219093 0 82.9275003 0.8587495499999918 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf20 3.90066717558 0 82.329165975 1.7562510375000073 @@ -1736,7 +1736,7 @@ conf20 3.90066717558 0 82.329165975 1.7562510375000073 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf21 3.86501599073 0 83.115000925 0.5774986124999941 @@ -1823,7 +1823,7 @@ conf21 3.86501599073 0 83.115000925 0.5774986124999941 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf22 3.40096875474 0 82.72416595 1.1637510750000004 @@ -1910,7 +1910,7 @@ conf22 3.40096875474 0 82.72416595 1.1637510750000004 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf23 3.5538161637 0 82.9700005 0.7949992500000036 @@ -1997,7 +1997,7 @@ conf23 3.5538161637 0 82.9700005 0.7949992500000036 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf24 3.17344943111 0 83.00083265 0.74875102499999 @@ -2084,7 +2084,7 @@ conf24 3.17344943111 0 83.00083265 0.74875102499999 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf25 2.76788477576 0 82.447499725 1.5787504124999998 @@ -2171,7 +2171,7 @@ conf25 2.76788477576 0 82.447499725 1.5787504124999998 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf26 3.92397614204 0 83.174999825 0.48750026249999934 @@ -2258,7 +2258,7 @@ conf26 3.92397614204 0 83.174999825 0.48750026249999934 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf27 3.4092100078 0 83.1424999 0.5362501499999937 @@ -2345,7 +2345,7 @@ conf27 3.4092100078 0 83.1424999 0.5362501499999937 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf28 3.03961006636 0 82.704167175 1.1937492375000076 @@ -2432,7 +2432,7 @@ conf28 3.03961006636 0 82.704167175 1.1937492375000076 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf29 3.62973730797 0 83.085833325 0.6212500125000062 @@ -2519,7 +2519,7 @@ conf29 3.62973730797 0 83.085833325 0.6212500125000062 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf30 2.81140054286 0 82.4325003 1.6012495499999986 @@ -2605,8 +2605,8 @@ conf30 2.81140054286 0 82.4325003 1.6012495499999986 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf31 3.18575701105 0 82.52833345 1.4574998249999922 @@ -2693,7 +2693,7 @@ conf31 3.18575701105 0 82.52833345 1.4574998249999922 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf32 3.42595978009 0 82.7708333 1.09375004999999 @@ -2780,7 +2780,7 @@ conf32 3.42595978009 0 82.7708333 1.09375004999999 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf33 3.17255385439 0 82.7233329 1.165000649999996 @@ -2867,7 +2867,7 @@ conf33 3.17255385439 0 82.7233329 1.165000649999996 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf34 3.6391339197 0 82.831667325 1.002499012500003 @@ -2954,7 +2954,7 @@ conf34 3.6391339197 0 82.831667325 1.002499012500003 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf35 2.72368244288 0 83.034168075 0.698747887500005 @@ -3041,7 +3041,7 @@ conf35 2.72368244288 0 83.034168075 0.698747887500005 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf36 3.44714364594 0 82.539999575 1.440000637500006 @@ -3128,7 +3128,7 @@ conf36 3.44714364594 0 82.539999575 1.440000637500006 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf37 3.85171694927 0 83.137500575 0.5437491374999937 @@ -3215,7 +3215,7 @@ conf37 3.85171694927 0 83.137500575 0.5437491374999937 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf38 3.02151032351 0 83.1958331 0.4562503499999977 @@ -3302,7 +3302,7 @@ conf38 3.02151032351 0 83.1958331 0.4562503499999977 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf39 3.92280583455 0 83.017499875 0.7237501875000021 @@ -3389,7 +3389,7 @@ conf39 3.92280583455 0 83.017499875 0.7237501875000021 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf40 4.15840004076 0 82.141666525 2.0375002124999924 @@ -3476,7 +3476,7 @@ conf40 4.15840004076 0 82.141666525 2.0375002124999924 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf41 2.89589235375 0 82.634166725 1.2987499125 @@ -3563,7 +3563,7 @@ conf41 2.89589235375 0 82.634166725 1.2987499125 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf42 6.16453028593 0 81.9308327 2.3537509500000056 @@ -3650,7 +3650,7 @@ conf42 6.16453028593 0 81.9308327 2.3537509500000056 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf43 4.7463107647 0 81.9616666 2.3075000999999986 @@ -3737,7 +3737,7 @@ conf43 4.7463107647 0 81.9616666 2.3075000999999986 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf44 5.84575268801 0 81.983333575 2.2749996374999952 @@ -3824,7 +3824,7 @@ conf44 5.84575268801 0 81.983333575 2.2749996374999952 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf45 3.45773167067 0 82.297500375 1.8037494375000023 @@ -3911,7 +3911,7 @@ conf45 3.45773167067 0 82.297500375 1.8037494375000023 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf46 4.34036485844 0 82.463333875 1.5549991874999947 @@ -3998,7 +3998,7 @@ conf46 4.34036485844 0 82.463333875 1.5549991874999947 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf47 4.75207062649 0 81.74500045 2.6324993249999906 @@ -4085,7 +4085,7 @@ conf47 4.75207062649 0 81.74500045 2.6324993249999906 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf48 6.09000225926 0 81.9883331 2.2675003499999917 @@ -4172,7 +4172,7 @@ conf48 6.09000225926 0 81.9883331 2.2675003499999917 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf49 5.25532208128 0 81.92083345 2.3687498249999948 @@ -4259,7 +4259,7 @@ conf49 5.25532208128 0 81.92083345 2.3687498249999948 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf50 4.35262313423 0 82.356666825 1.7149997624999926 @@ -4346,7 +4346,7 @@ conf50 4.35262313423 0 82.356666825 1.7149997624999926 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf51 5.25144034242 0 81.9350005 2.3474992499999985 @@ -4433,7 +4433,7 @@ conf51 5.25144034242 0 81.9350005 2.3474992499999985 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf52 5.32967222406 0 81.9616671 2.3074993500000005 @@ -4520,7 +4520,7 @@ conf52 5.32967222406 0 81.9616671 2.3074993500000005 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf53 4.13210954206 0 83.056666875 0.6649996874999928 @@ -4607,7 +4607,7 @@ conf53 4.13210954206 0 83.056666875 0.6649996874999928 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf54 3.93967771859 0 81.763332525 2.6050012125000066 @@ -4694,7 +4694,7 @@ conf54 3.93967771859 0 81.763332525 2.6050012125000066 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf55 4.13872474867 0 82.631666775 1.302499837500001 @@ -4781,7 +4781,7 @@ conf55 4.13872474867 0 82.631666775 1.302499837500001 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf56 2.7690518229 0 81.93666605 2.345000925000001 @@ -4867,8 +4867,8 @@ conf56 2.7690518229 0 81.93666605 2.345000925000001 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf57 5.60283597265 0 81.9233338 2.364999300000008 @@ -4955,7 +4955,7 @@ conf57 5.60283597265 0 81.9233338 2.364999300000008 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf58 4.45486021161 0 82.0608328 2.1587508 @@ -5042,7 +5042,7 @@ conf58 4.45486021161 0 82.0608328 2.1587508 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf59 4.22738367053 0 82.226667075 1.909999387500008 @@ -5129,7 +5129,7 @@ conf59 4.22738367053 0 82.226667075 1.909999387500008 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf60 6.10852785257 0 81.959167325 2.3112490125000065 @@ -5216,7 +5216,7 @@ conf60 6.10852785257 0 81.959167325 2.3112490125000065 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf61 4.98692149992 0 81.822500025 2.516249962500005 @@ -5303,7 +5303,7 @@ conf61 4.98692149992 0 81.822500025 2.516249962500005 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf62 6.11662384336 0 81.9808335 2.278749749999996 @@ -5390,7 +5390,7 @@ conf62 6.11662384336 0 81.9808335 2.278749749999996 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf63 5.97727027928 0 82.0224998 2.2162502999999916 @@ -5477,7 +5477,7 @@ conf63 5.97727027928 0 82.0224998 2.2162502999999916 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf64 2.93382347771 0 81.85416565 2.468751525000002 @@ -5564,7 +5564,7 @@ conf64 2.93382347771 0 81.85416565 2.468751525000002 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf65 5.54950778131 0 81.73833355 2.6424996750000105 @@ -5651,7 +5651,7 @@ conf65 5.54950778131 0 81.73833355 2.6424996750000105 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf66 6.11662384336 0 81.889999075 2.4150013874999914 @@ -5738,7 +5738,7 @@ conf66 6.11662384336 0 81.889999075 2.4150013874999914 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf67 3.82767121119 0 82.26583335 1.8512499750000089 @@ -5825,7 +5825,7 @@ conf67 3.82767121119 0 82.26583335 1.8512499750000089 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf68 4.99324893801 0 81.816667 2.524999500000007 @@ -5912,7 +5912,7 @@ conf68 4.99324893801 0 81.816667 2.524999500000007 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf69 5.74180480491 0 81.889166525 2.4162502125000103 @@ -5999,7 +5999,7 @@ conf69 5.74180480491 0 81.889166525 2.4162502125000103 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf70 2.8416794212 0 82.981666775 0.7774998375000095 @@ -6085,8 +6085,8 @@ conf70 2.8416794212 0 82.981666775 0.7774998375000095 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf71 5.63492586502 0 81.993333575 2.259999637500009 @@ -6173,7 +6173,7 @@ conf71 5.63492586502 0 81.993333575 2.259999637500009 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf72 3.83421974764 0 83.207499825 0.6925001750000007 @@ -6260,7 +6260,7 @@ conf72 3.83421974764 0 83.207499825 0.6925001750000007 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf73 1.87482212142 0 82.811667125 1.0324993125000006 @@ -6346,8 +6346,8 @@ conf73 1.87482212142 0 82.811667125 1.0324993125000006 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf74 4.43826460769 0 81.75166725 2.6224991250000045 @@ -6434,7 +6434,7 @@ conf74 4.43826460769 0 81.75166725 2.6224991250000045 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf75 5.03123041946 0 82.306666375 1.790000437499991 @@ -6521,7 +6521,7 @@ conf75 5.03123041946 0 82.306666375 1.790000437499991 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf76 4.2701321542 0 82.1975002 1.9537497000000101 @@ -6608,7 +6608,7 @@ conf76 4.2701321542 0 82.1975002 1.9537497000000101 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf77 3.8174916299 0 83.4608337 0.4391663000000051 @@ -6695,7 +6695,7 @@ conf77 3.8174916299 0 83.4608337 0.4391663000000051 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf78 3.54707867194 0 83.409999875 0.4900001250000031 @@ -6782,7 +6782,7 @@ conf78 3.54707867194 0 83.409999875 0.4900001250000031 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf79 3.90991089555 0 83.46999955 0.43000045000000287 @@ -6869,7 +6869,7 @@ conf79 3.90991089555 0 83.46999955 0.43000045000000287 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf80 3.67974499409 0 83.3833332 0.5166668000000044 @@ -6956,7 +6956,7 @@ conf80 3.67974499409 0 83.3833332 0.5166668000000044 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf81 2.82550849059 0 83.28583315 0.614166849999998 @@ -7043,7 +7043,7 @@ conf81 2.82550849059 0 83.28583315 0.614166849999998 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf82 2.77356236628 0 83.60833335 0.2916666500000048 @@ -7130,7 +7130,7 @@ conf82 2.77356236628 0 83.60833335 0.2916666500000048 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf83 2.54319644535 0 83.399166475 0.5008335249999988 @@ -7216,8 +7216,8 @@ conf83 2.54319644535 0 83.399166475 0.5008335249999988 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf84 3.73175797849 0 83.50083335 0.3991666500000065 @@ -7304,7 +7304,7 @@ conf84 3.73175797849 0 83.50083335 0.3991666500000065 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf85 3.36226524472 0 83.472500175 0.4274998250000067 @@ -7391,7 +7391,7 @@ conf85 3.36226524472 0 83.472500175 0.4274998250000067 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf86 3.00745254477 0 83.33583355 0.5641664499999962 @@ -7478,7 +7478,7 @@ conf86 3.00745254477 0 83.33583355 0.5641664499999962 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf87 3.95482984539 0 83.289165675 0.6108343249999933 @@ -7565,7 +7565,7 @@ conf87 3.95482984539 0 83.289165675 0.6108343249999933 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf88 2.89889994154 0 83.269167175 0.6308328249999932 @@ -7652,7 +7652,7 @@ conf88 2.89889994154 0 83.269167175 0.6308328249999932 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf89 3.60299625636 0 83.3791664 0.5208335999999975 @@ -7739,7 +7739,7 @@ conf89 3.60299625636 0 83.3791664 0.5208335999999975 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf90 3.13621575975 0 83.54083385 0.3591661500000015 @@ -7826,7 +7826,7 @@ conf90 3.13621575975 0 83.54083385 0.3591661500000015 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf91 2.61388509814 0 83.457500225 0.44249977499999604 @@ -7913,7 +7913,7 @@ conf91 2.61388509814 0 83.457500225 0.44249977499999604 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf92 4.05930607617 0 83.1716666 0.492500100000008 @@ -8000,7 +8000,7 @@ conf92 4.05930607617 0 83.1716666 0.492500100000008 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf93 3.73175797849 0 83.575000225 0.32499977500000343 @@ -8087,7 +8087,7 @@ conf93 3.73175797849 0 83.575000225 0.32499977500000343 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf94 3.76274140853 0 83.47916685 0.42083315000000143 @@ -8174,7 +8174,7 @@ conf94 3.76274140853 0 83.47916685 0.42083315000000143 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf95 3.20332969056 0 83.850833275 0.04916672500000063 @@ -8261,7 +8261,7 @@ conf95 3.20332969056 0 83.850833275 0.04916672500000063 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf96 3.64570944225 0 83.45249935 0.4475006500000035 @@ -8348,7 +8348,7 @@ conf96 3.64570944225 0 83.45249935 0.4475006500000035 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf97 3.85035669633 0 83.5608333 0.33916670000000126 @@ -8435,7 +8435,7 @@ conf97 3.85035669633 0 83.5608333 0.33916670000000126 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf98 3.54829526922 0 83.25249975 0.6475002500000017 @@ -8522,7 +8522,7 @@ conf98 3.54829526922 0 83.25249975 0.6475002500000017 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf99 3.61899339422 0 83.278334075 0.6216659249999964 @@ -8609,7 +8609,7 @@ conf99 3.61899339422 0 83.278334075 0.6216659249999964 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf100 3.28254525212 0 83.489167025 0.4108329749999996 @@ -8696,7 +8696,7 @@ conf100 3.28254525212 0 83.489167025 0.4108329749999996 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf101 3.50816224551 0 83.252499725 0.6475002749999931 @@ -8783,7 +8783,7 @@ conf101 3.50816224551 0 83.252499725 0.6475002749999931 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf102 4.10549146346 0 83.3416668 0.5583332000000013 @@ -8870,7 +8870,7 @@ conf102 4.10549146346 0 83.3416668 0.5583332000000013 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf103 3.36715352889 0 83.584166725 0.31583327499999714 @@ -8957,7 +8957,7 @@ conf103 3.36715352889 0 83.584166725 0.31583327499999714 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf104 3.1088246435 0 83.1591665 0.5112502500000033 @@ -9044,7 +9044,7 @@ conf104 3.1088246435 0 83.1591665 0.5112502500000033 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf105 3.47488338292 0 83.388333125 0.5116668749999974 @@ -9131,7 +9131,7 @@ conf105 3.47488338292 0 83.388333125 0.5116668749999974 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf106 3.59538418566 0 83.5300007 0.3699992999999978 @@ -9218,7 +9218,7 @@ conf106 3.59538418566 0 83.5300007 0.3699992999999978 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf107 3.97286473272 0 83.537499975 0.3625000249999971 @@ -9305,7 +9305,7 @@ conf107 3.97286473272 0 83.537499975 0.3625000249999971 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf108 3.8174916299 0 83.451667075 0.4483329249999969 @@ -9392,7 +9392,7 @@ conf108 3.8174916299 0 83.451667075 0.4483329249999969 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf109 3.46345463754 0 83.462500825 0.4374991749999936 @@ -9479,7 +9479,7 @@ conf109 3.46345463754 0 83.462500825 0.4374991749999936 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf110 3.556746151 0 83.424999425 0.47500057500000425 @@ -9566,7 +9566,7 @@ conf110 3.556746151 0 83.424999425 0.47500057500000425 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf111 3.96049527585 0 83.53333295 0.3666670499999981 @@ -9653,7 +9653,7 @@ conf111 3.96049527585 0 83.53333295 0.3666670499999981 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf112 3.9715961288 0 83.32833325 0.57166675 @@ -9740,7 +9740,7 @@ conf112 3.9715961288 0 83.32833325 0.57166675 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf113 3.93130152041 0 82.56666695 1.3999995750000025 @@ -9827,7 +9827,7 @@ conf113 3.93130152041 0 82.56666695 1.3999995750000025 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf114 2.8668123125 0 82.998333125 0.7525003124999969 @@ -9914,7 +9914,7 @@ conf114 2.8668123125 0 82.998333125 0.7525003124999969 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf115 2.64845545339 0 82.965000875 0.8024986874999982 @@ -10001,7 +10001,7 @@ conf115 2.64845545339 0 82.965000875 0.8024986874999982 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf116 3.14597582271 0 82.849167475 0.9762487874999977 @@ -10088,7 +10088,7 @@ conf116 3.14597582271 0 82.849167475 0.9762487874999977 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf117 2.72482912735 0 83.4741666 0.42583339999999625 @@ -10175,7 +10175,7 @@ conf117 2.72482912735 0 83.4741666 0.42583339999999625 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf118 3.95103617451 0 82.3491667 1.7262499500000033 @@ -10262,7 +10262,7 @@ conf118 3.95103617451 0 82.3491667 1.7262499500000033 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf119 3.00588110745 0 83.02166615 0.7175007749999978 @@ -10349,7 +10349,7 @@ conf119 3.00588110745 0 83.02166615 0.7175007749999978 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf120 3.28000196808 0 82.762500375 1.1062494374999972 @@ -10436,7 +10436,7 @@ conf120 3.28000196808 0 82.762500375 1.1062494374999972 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf121 3.47173739276 0 82.802499825 1.0462502625000027 @@ -10523,7 +10523,7 @@ conf121 3.47173739276 0 82.802499825 1.0462502625000027 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf122 2.61621549789 0 83.197500425 0.4537493624999982 @@ -10610,7 +10610,7 @@ conf122 2.61621549789 0 83.197500425 0.4537493624999982 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf123 2.95549421538 0 83.010000475 0.7349992875000027 @@ -10697,7 +10697,7 @@ conf123 2.95549421538 0 83.010000475 0.7349992875000027 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf124 3.88669230643 0 82.45333265 1.5700010250000105 @@ -10784,7 +10784,7 @@ conf124 3.88669230643 0 82.45333265 1.5700010250000105 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf125 2.83364863532 0 82.581667225 1.3774991624999942 @@ -10871,7 +10871,7 @@ conf125 2.83364863532 0 82.581667225 1.3774991624999942 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf126 3.08002863884 0 82.504999925 1.4925001124999895 @@ -10958,7 +10958,7 @@ conf126 3.08002863884 0 82.504999925 1.4925001124999895 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf127 2.00048974491 0 82.50666635 1.490000474999995 @@ -11044,8 +11044,8 @@ conf127 2.00048974491 0 82.50666635 1.490000474999995 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf128 2.73765608296 0 82.255832625 1.8662510625000053 @@ -11132,7 +11132,7 @@ conf128 2.73765608296 0 82.255832625 1.8662510625000053 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf129 4.06487927094 0 83.13333375 0.5499993749999916 @@ -11219,7 +11219,7 @@ conf129 4.06487927094 0 83.13333375 0.5499993749999916 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf130 4.09684344986 0 83.003333475 0.7449997874999923 @@ -11306,7 +11306,7 @@ conf130 4.09684344986 0 83.003333475 0.7449997874999923 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf131 3.21849911232 0 82.29583385 1.806249225000009 @@ -11393,7 +11393,7 @@ conf131 3.21849911232 0 82.29583385 1.806249225000009 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf132 2.04860322208 0 82.465833275 1.5512500875000086 @@ -11480,7 +11480,7 @@ conf132 2.04860322208 0 82.465833275 1.5512500875000086 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf133 3.80144895722 0 82.52833315 1.4575002750000081 @@ -11567,7 +11567,7 @@ conf133 3.80144895722 0 82.52833315 1.4575002750000081 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf134 2.56619518427 0 82.785834275 1.0712485875000013 @@ -11653,8 +11653,8 @@ conf134 2.56619518427 0 82.785834275 1.0712485875000013 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf135 3.62695395201 0 82.454165975 1.5687510375000073 @@ -11741,7 +11741,7 @@ conf135 3.62695395201 0 82.454165975 1.5687510375000073 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf136 3.89727045934 0 82.97500015 0.7874997750000006 @@ -11828,7 +11828,7 @@ conf136 3.89727045934 0 82.97500015 0.7874997750000006 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf137 4.12276718448 0 82.457499725 1.5637504124999921 @@ -11915,7 +11915,7 @@ conf137 4.12276718448 0 82.457499725 1.5637504124999921 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf138 3.60700810131 0 82.545832825 1.4312507624999924 @@ -12002,7 +12002,7 @@ conf138 3.60700810131 0 82.545832825 1.4312507624999924 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf139 4.0205364833 0 81.89083385 2.4137492249999895 @@ -12089,7 +12089,7 @@ conf139 4.0205364833 0 81.89083385 2.4137492249999895 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf140 4.11840212461 0 81.8091669 2.536249650000009 @@ -12176,7 +12176,7 @@ conf140 4.11840212461 0 81.8091669 2.536249650000009 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf141 3.01049479281 0 82.019999875 2.2200001875000055 @@ -12263,7 +12263,7 @@ conf141 3.01049479281 0 82.019999875 2.2200001875000055 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf142 3.85335942385 0 82.0483325 2.177501249999999 @@ -12350,7 +12350,7 @@ conf142 3.85335942385 0 82.0483325 2.177501249999999 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf143 2.5026299742 0 81.625833925 2.8112491125000076 @@ -12437,7 +12437,7 @@ conf143 2.5026299742 0 81.625833925 2.8112491125000076 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf144 3.43886954105 0 82.726666675 1.1599999874999938 @@ -12524,7 +12524,7 @@ conf144 3.43886954105 0 82.726666675 1.1599999874999938 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf145 3.73017005141 0 82.89583355 0.9062496749999909 @@ -12611,7 +12611,7 @@ conf145 3.73017005141 0 82.89583355 0.9062496749999909 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf146 3.15732515345 0 81.805000075 2.542499887500007 @@ -12698,7 +12698,7 @@ conf146 3.15732515345 0 81.805000075 2.542499887500007 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf147 3.98632041312 0 82.07166655 2.142500174999995 @@ -12785,7 +12785,7 @@ conf147 3.98632041312 0 82.07166655 2.142500174999995 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf148 3.80024443647 0 82.9274992 0.8587512000000004 @@ -12872,7 +12872,7 @@ conf148 3.80024443647 0 82.9274992 0.8587512000000004 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf149 4.08467265051 0 82.004166025 2.2437509624999947 @@ -12959,7 +12959,7 @@ conf149 4.08467265051 0 82.004166025 2.2437509624999947 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf150 4.02990448369 0 82.30916665 1.7862500250000082 @@ -13046,7 +13046,7 @@ conf150 4.02990448369 0 82.30916665 1.7862500250000082 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf151 3.90355228103 0 81.8025006 2.546249099999997 @@ -13133,7 +13133,7 @@ conf151 3.90355228103 0 81.8025006 2.546249099999997 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf152 3.94843898601 0 81.8916664 2.412500399999992 @@ -13220,7 +13220,7 @@ conf152 3.94843898601 0 81.8916664 2.412500399999992 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf153 4.12751348406 0 81.888333875 2.417499187499999 @@ -13307,7 +13307,7 @@ conf153 4.12751348406 0 81.888333875 2.417499187499999 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf154 4.02515780566 0 81.9924993 2.2612510499999914 @@ -13394,7 +13394,7 @@ conf154 4.02515780566 0 81.9924993 2.2612510499999914 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf155 3.83482666749 0 82.1449991 2.0325013499999898 @@ -13481,7 +13481,7 @@ conf155 3.83482666749 0 82.1449991 2.0325013499999898 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf156 3.8517164764 0 82.2041664 1.9437503999999919 @@ -13568,7 +13568,7 @@ conf156 3.8517164764 0 82.2041664 1.9437503999999919 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf157 4.10598132256 0 81.954167525 2.318748712499996 @@ -13655,7 +13655,7 @@ conf157 4.10598132256 0 81.954167525 2.318748712499996 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf158 3.31478397356 0 81.75083395 2.623749074999999 @@ -13742,7 +13742,7 @@ conf158 3.31478397356 0 81.75083395 2.623749074999999 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf159 3.96623260541 0 82.226666875 1.9099996874999903 @@ -13829,7 +13829,7 @@ conf159 3.96623260541 0 82.226666875 1.9099996874999903 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf160 4.04782353886 0 82.049166175 2.1762507375000055 @@ -13916,7 +13916,7 @@ conf160 4.04782353886 0 82.049166175 2.1762507375000055 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf161 4.02858926028 0 81.793333275 2.5600000875000077 @@ -14003,7 +14003,7 @@ conf161 4.02858926028 0 81.793333275 2.5600000875000077 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf162 3.00292158132 0 81.6974989 2.703751650000001 @@ -14090,7 +14090,7 @@ conf162 3.00292158132 0 81.6974989 2.703751650000001 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf163 3.29748236022 0 82.1050001 2.092499850000003 @@ -14177,7 +14177,7 @@ conf163 3.29748236022 0 82.1050001 2.092499850000003 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf164 4.0362576555 0 82.2466675 1.8799987499999986 @@ -14264,7 +14264,7 @@ conf164 4.0362576555 0 82.2466675 1.8799987499999986 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf165 3.05446537337 0 82.1716669 1.992499649999992 @@ -14351,7 +14351,7 @@ conf165 3.05446537337 0 82.1716669 1.992499649999992 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf166 3.9071658065 0 82.190832875 1.9637506875000028 @@ -14438,7 +14438,7 @@ conf166 3.9071658065 0 82.190832875 1.9637506875000028 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf167 3.93287113327 0 82.12666665 2.0600000249999937 @@ -14525,7 +14525,7 @@ conf167 3.93287113327 0 82.12666665 2.0600000249999937 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf168 4.04478954767 0 81.856666625 2.465000062499996 @@ -14612,7 +14612,7 @@ conf168 4.04478954767 0 81.856666625 2.465000062499996 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf169 3.61558117477 0 81.9016671 2.397499350000004 @@ -14699,7 +14699,7 @@ conf169 3.61558117477 0 81.9016671 2.397499350000004 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf170 4.30825679247 0 81.851666825 2.472499762500007 @@ -14786,7 +14786,7 @@ conf170 4.30825679247 0 81.851666825 2.472499762500007 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf171 3.69363094091 0 82.946667475 0.8299987875000028 @@ -14873,7 +14873,7 @@ conf171 3.69363094091 0 82.946667475 0.8299987875000028 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf172 2.77993236963 0 82.4766672 1.5349992000000086 @@ -14960,7 +14960,7 @@ conf172 2.77993236963 0 82.4766672 1.5349992000000086 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf173 2.54145510026 0 82.7925007 1.0612489499999924 @@ -15046,8 +15046,8 @@ conf173 2.54145510026 0 82.7925007 1.0612489499999924 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf174 3.87860542119 0 82.008333625 2.237499562499991 @@ -15134,7 +15134,7 @@ conf174 3.87860542119 0 82.008333625 2.237499562499991 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf175 3.56868994119 0 82.02499975 2.2125003749999905 @@ -15221,7 +15221,7 @@ conf175 3.56868994119 0 82.02499975 2.2125003749999905 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf176 2.23770347257 0 83.0258332 0.7112502000000092 @@ -15307,8 +15307,8 @@ conf176 2.23770347257 0 83.0258332 0.7112502000000092 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf177 2.49861746763 0 82.984166525 0.7737502124999907 @@ -15395,7 +15395,7 @@ conf177 2.49861746763 0 82.984166525 0.7737502124999907 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf178 2.02993947881 0 83.30250015 0.5974998499999998 @@ -15481,8 +15481,8 @@ conf178 2.02993947881 0 83.30250015 0.5974998499999998 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf179 2.20017225716 0 83.1766651 0.48500235000000913 @@ -15568,8 +15568,8 @@ conf179 2.20017225716 0 83.1766651 0.48500235000000913 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf180 2.71551751125 0 83.228333425 0.6716665749999976 @@ -15656,7 +15656,7 @@ conf180 2.71551751125 0 83.228333425 0.6716665749999976 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf181 2.06184549766 0 83.031666575 0.702500137499996 @@ -15742,8 +15742,8 @@ conf181 2.06184549766 0 83.031666575 0.702500137499996 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf182 2.77617639439 0 82.328333125 1.7575003124999995 @@ -15829,8 +15829,8 @@ conf182 2.77617639439 0 82.328333125 1.7575003124999995 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf183 3.8694276968 0 82.7800006 1.079999100000009 @@ -15917,7 +15917,7 @@ conf183 3.8694276968 0 82.7800006 1.079999100000009 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf184 2.34590457627 0 82.2816663 1.8275005500000034 @@ -16003,8 +16003,8 @@ conf184 2.34590457627 0 82.2816663 1.8275005500000034 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf185 4.07407440381 0 82.244165975 1.883751037499998 @@ -16091,7 +16091,7 @@ conf185 4.07407440381 0 82.244165975 1.883751037499998 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf186 3.11780856309 0 82.428332975 1.6075005374999947 @@ -16178,7 +16178,7 @@ conf186 3.11780856309 0 82.428332975 1.6075005374999947 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf187 3.58558835651 0 82.3399998 1.7400002999999984 @@ -16265,7 +16265,7 @@ conf187 3.58558835651 0 82.3399998 1.7400002999999984 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf188 3.85234242953 0 82.34583265 1.7312510249999917 @@ -16352,7 +16352,7 @@ conf188 3.85234242953 0 82.34583265 1.7312510249999917 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf189 2.7074193437 0 82.325833575 1.7612496374999935 @@ -16439,7 +16439,7 @@ conf189 2.7074193437 0 82.325833575 1.7612496374999935 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf190 4.05895393605 0 82.3066669 1.7899996500000057 @@ -16526,7 +16526,7 @@ conf190 4.05895393605 0 82.3066669 1.7899996500000057 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf191 3.78103563563 0 82.3833346 1.6749981000000034 @@ -16613,7 +16613,7 @@ conf191 3.78103563563 0 82.3833346 1.6749981000000034 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf192 3.7929361233 0 82.3158331 1.7762503499999909 @@ -16700,7 +16700,7 @@ conf192 3.7929361233 0 82.3158331 1.7762503499999909 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf193 2.97917012062 0 82.399166375 1.6512504375000105 @@ -16786,8 +16786,8 @@ conf193 2.97917012062 0 82.399166375 1.6512504375000105 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf194 3.70896846547 0 82.84583335 0.9812499749999901 @@ -16874,7 +16874,7 @@ conf194 3.70896846547 0 82.84583335 0.9812499749999901 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf195 3.05031465583 0 82.129166025 2.0562509624999947 @@ -16961,7 +16961,7 @@ conf195 3.05031465583 0 82.129166025 2.0562509624999947 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf196 4.76953621711 0 81.905 2.3924999999999983 @@ -17048,7 +17048,7 @@ conf196 4.76953621711 0 81.905 2.3924999999999983 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf197 4.82068705485 0 82.02250055 2.2162491749999944 @@ -17135,7 +17135,7 @@ conf197 4.82068705485 0 82.02250055 2.2162491749999944 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf198 3.40685460008 0 82.039999825 2.190000262500007 @@ -17222,7 +17222,7 @@ conf198 3.40685460008 0 82.039999825 2.190000262500007 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf199 3.7406185613 0 82.309166475 1.7862502875000033 @@ -17309,7 +17309,7 @@ conf199 3.7406185613 0 82.309166475 1.7862502875000033 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf200 3.09685498241 0 82.1758326 1.9862510999999898 @@ -17396,7 +17396,7 @@ conf200 3.09685498241 0 82.1758326 1.9862510999999898 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf201 3.23081977958 0 82.9983336 0.7524996000000073 @@ -17483,7 +17483,7 @@ conf201 3.23081977958 0 82.9983336 0.7524996000000073 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf202 3.96567454672 0 82.4983326 1.5025011000000035 @@ -17570,7 +17570,7 @@ conf202 3.96567454672 0 82.4983326 1.5025011000000035 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf203 4.59326226068 0 82.658333425 1.2624998625000075 @@ -17657,7 +17657,7 @@ conf203 4.59326226068 0 82.658333425 1.2624998625000075 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf204 4.03800709024 0 82.6316668 1.3024997999999925 @@ -17744,7 +17744,7 @@ conf204 4.03800709024 0 82.6316668 1.3024997999999925 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf205 3.42928358185 0 82.031665675 2.202501487500001 @@ -17831,7 +17831,7 @@ conf205 3.42928358185 0 82.031665675 2.202501487500001 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf206 3.32221289747 0 82.153333675 2.0199994875000016 @@ -17918,7 +17918,7 @@ conf206 3.32221289747 0 82.153333675 2.0199994875000016 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf207 5.66794988438 0 81.89250005 2.411249925000007 @@ -18005,7 +18005,7 @@ conf207 5.66794988438 0 81.89250005 2.411249925000007 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf208 2.71874001219 0 82.497499925 1.5037501125000006 @@ -18092,7 +18092,7 @@ conf208 2.71874001219 0 82.497499925 1.5037501125000006 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf209 3.03474416486 0 82.05833335 2.162499975000003 @@ -18179,7 +18179,7 @@ conf209 3.03474416486 0 82.05833335 2.162499975000003 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf210 3.88176217612 0 82.449999725 1.5750004125000032 @@ -18266,7 +18266,7 @@ conf210 3.88176217612 0 82.449999725 1.5750004125000032 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf211 3.54762785706 0 83.89833405 0.0016659499999974736 @@ -18353,7 +18353,7 @@ conf211 3.54762785706 0 83.89833405 0.0016659499999974736 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf212 3.05039482856 0 82.7850004 1.072499399999998 @@ -18440,7 +18440,7 @@ conf212 3.05039482856 0 82.7850004 1.072499399999998 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf213 2.82821126308 0 82.680833325 1.228750012500008 @@ -18527,7 +18527,7 @@ conf213 2.82821126308 0 82.680833325 1.228750012500008 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf214 3.81477730635 0 82.135832925 2.046250612499996 @@ -18614,7 +18614,7 @@ conf214 3.81477730635 0 82.135832925 2.046250612499996 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf215 3.39722289075 0 83.580000125 0.31999987500000204 @@ -18701,7 +18701,7 @@ conf215 3.39722289075 0 83.580000125 0.31999987500000204 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf216 3.89111320826 0 82.1216669 2.067499650000009 @@ -18788,7 +18788,7 @@ conf216 3.89111320826 0 82.1216669 2.067499650000009 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf217 3.50253383593 0 82.2108337 1.9337494500000076 @@ -18875,7 +18875,7 @@ conf217 3.50253383593 0 82.2108337 1.9337494500000076 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf218 4.05813557147 0 82.50000075 1.4999988750000028 @@ -18962,7 +18962,7 @@ conf218 4.05813557147 0 82.50000075 1.4999988750000028 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf219 3.26770542063 0 83.100000825 0.5999987625000074 @@ -19049,7 +19049,7 @@ conf219 3.26770542063 0 83.100000825 0.5999987625000074 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf220 3.18828477511 0 82.7100005 1.18499924999999 @@ -19136,7 +19136,7 @@ conf220 3.18828477511 0 82.7100005 1.18499924999999 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf221 2.71225509774 0 82.55166665 1.422500024999998 @@ -19223,7 +19223,7 @@ conf221 2.71225509774 0 82.55166665 1.422500024999998 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf222 3.87817450174 0 82.5324995 1.4512507499999998 @@ -19310,7 +19310,7 @@ conf222 3.87817450174 0 82.5324995 1.4512507499999998 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf223 4.03339162129 0 82.4558336 1.566249599999992 @@ -19397,7 +19397,7 @@ conf223 4.03339162129 0 82.4558336 1.566249599999992 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf224 3.91264706835 0 82.13083465 2.0537480250000044 @@ -19484,7 +19484,7 @@ conf224 3.91264706835 0 82.13083465 2.0537480250000044 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf225 3.06837262281 0 82.53416635 1.44875047499999 @@ -19571,7 +19571,7 @@ conf225 3.06837262281 0 82.53416635 1.44875047499999 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf226 3.69764932636 0 81.918333375 2.3724999374999953 @@ -19658,7 +19658,7 @@ conf226 3.69764932636 0 81.918333375 2.3724999374999953 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf227 2.43486575166 0 82.865833525 0.951249712500001 @@ -19744,8 +19744,8 @@ conf227 2.43486575166 0 82.865833525 0.951249712500001 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf228 2.32359233423 0 82.09833355 2.10249967499999 @@ -19832,7 +19832,7 @@ conf228 2.32359233423 0 82.09833355 2.10249967499999 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf229 3.42928358185 0 82.219165975 1.9212510375000065 @@ -19919,7 +19919,7 @@ conf229 3.42928358185 0 82.219165975 1.9212510375000065 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf230 3.55619894808 0 81.826667025 2.509999462499991 @@ -20006,7 +20006,7 @@ conf230 3.55619894808 0 81.826667025 2.509999462499991 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf231 2.86694058251 0 82.870833375 0.9437499374999945 @@ -20093,7 +20093,7 @@ conf231 2.86694058251 0 82.870833375 0.9437499374999945 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf232 4.88375274604 0 81.896667125 2.40499931250001 @@ -20180,7 +20180,7 @@ conf232 4.88375274604 0 81.896667125 2.40499931250001 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf233 3.28938021997 0 82.0241669 2.213749650000004 @@ -20267,7 +20267,7 @@ conf233 3.28938021997 0 82.0241669 2.213749650000004 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf234 4.39309472823 0 81.828332925 2.507500612500003 @@ -20354,7 +20354,7 @@ conf234 4.39309472823 0 81.828332925 2.507500612500003 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf235 3.79357213589 0 83.575833175 0.3241668250000004 @@ -20441,7 +20441,7 @@ conf235 3.79357213589 0 83.575833175 0.3241668250000004 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf236 2.12486885207 0 83.46416625 0.4358337499999948 @@ -20528,7 +20528,7 @@ conf236 2.12486885207 0 83.46416625 0.4358337499999948 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf237 3.95836341588 0 83.341666975 0.5583330250000046 @@ -20615,7 +20615,7 @@ conf237 3.95836341588 0 83.341666975 0.5583330250000046 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf238 2.25904057642 0 83.567499 0.33250100000000204 @@ -20702,7 +20702,7 @@ conf238 2.25904057642 0 83.567499 0.33250100000000204 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf239 3.39599428853 0 83.3233327 0.5766673000000054 @@ -20789,7 +20789,7 @@ conf239 3.39599428853 0 83.3233327 0.5766673000000054 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf240 2.27086134134 0 83.0875002 0.6187497000000093 @@ -20876,7 +20876,7 @@ conf240 2.27086134134 0 83.0875002 0.6187497000000093 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf241 2.25812915866 0 83.616666775 0.28333322500000124 @@ -20963,7 +20963,7 @@ conf241 2.25812915866 0 83.616666775 0.28333322500000124 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf242 2.12981011418 0 83.4783326 0.42166739999999836 @@ -21050,7 +21050,7 @@ conf242 2.12981011418 0 83.4783326 0.42166739999999836 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf243 2.57878675932 0 83.4525004 0.4474995999999948 @@ -21137,7 +21137,7 @@ conf243 2.57878675932 0 83.4525004 0.4474995999999948 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf244 2.61999068304 0 83.43166675 0.4683332499999949 @@ -21224,7 +21224,7 @@ conf244 2.61999068304 0 83.43166675 0.4683332499999949 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf245 2.62385834639 0 83.3758336 0.5241663999999929 @@ -21311,7 +21311,7 @@ conf245 2.62385834639 0 83.3758336 0.5241663999999929 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf246 3.78695562862 0 83.406667325 0.4933326749999992 @@ -21398,7 +21398,7 @@ conf246 3.78695562862 0 83.406667325 0.4933326749999992 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf247 3.25056012417 0 83.410000075 0.4899999250000008 @@ -21485,7 +21485,7 @@ conf247 3.25056012417 0 83.410000075 0.4899999250000008 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf248 2.64116522688 0 83.459999775 0.440000225 @@ -21571,8 +21571,8 @@ conf248 2.64116522688 0 83.459999775 0.440000225 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf249 3.87266738703 0 83.2074999 0.6925000999999981 @@ -21659,7 +21659,7 @@ conf249 3.87266738703 0 83.2074999 0.6925000999999981 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf250 3.1762968602 0 83.257500125 0.642499874999993 @@ -21746,7 +21746,7 @@ conf250 3.1762968602 0 83.257500125 0.642499874999993 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf251 2.60635725011 0 83.481666275 0.4183337250000051 @@ -21833,7 +21833,7 @@ conf251 2.60635725011 0 83.481666275 0.4183337250000051 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf252 2.25521755755 0 83.1966667 0.45499995000000837 @@ -21920,7 +21920,7 @@ conf252 2.25521755755 0 83.1966667 0.45499995000000837 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf253 3.2990666889 0 83.4449997 0.45500030000000324 @@ -22007,7 +22007,7 @@ conf253 3.2990666889 0 83.4449997 0.45500030000000324 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf254 2.87427851974 0 83.0825007 0.6262489500000044 @@ -22094,7 +22094,7 @@ conf254 2.87427851974 0 83.0825007 0.6262489500000044 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf255 2.55397266535 0 83.6124996 0.28750039999999333 @@ -22181,7 +22181,7 @@ conf255 2.55397266535 0 83.6124996 0.28750039999999333 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf256 3.90339685542 0 83.2258328 0.6741671999999937 @@ -22268,7 +22268,7 @@ conf256 3.90339685542 0 83.2258328 0.6741671999999937 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf257 3.3158753237 0 83.366665625 0.533334375000004 @@ -22355,7 +22355,7 @@ conf257 3.3158753237 0 83.366665625 0.533334375000004 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf258 2.71891142175 0 83.264168025 0.6358319749999964 @@ -22442,7 +22442,7 @@ conf258 2.71891142175 0 83.264168025 0.6358319749999964 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf259 2.07975389368 0 83.454165825 0.445834174999996 @@ -22529,7 +22529,7 @@ conf259 2.07975389368 0 83.454165825 0.445834174999996 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf260 2.7013324964 0 83.281666425 0.6183335750000026 @@ -22615,8 +22615,8 @@ conf260 2.7013324964 0 83.281666425 0.6183335750000026 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf261 2.27247670758 0 83.3800005 0.5199995000000058 @@ -22703,7 +22703,7 @@ conf261 2.27247670758 0 83.3800005 0.5199995000000058 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf262 3.79597347164 0 83.321666575 0.5783334250000053 @@ -22790,7 +22790,7 @@ conf262 3.79597347164 0 83.321666575 0.5783334250000053 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf263 2.98573825971 0 83.297500075 0.6024999249999979 @@ -22877,7 +22877,7 @@ conf263 2.98573825971 0 83.297500075 0.6024999249999979 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf264 2.6496606385 0 83.2325002 0.667499799999996 @@ -22964,7 +22964,7 @@ conf264 2.6496606385 0 83.2325002 0.667499799999996 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf265 3.60943759784 0 83.4424997 0.45750030000000097 @@ -23051,7 +23051,7 @@ conf265 3.60943759784 0 83.4424997 0.45750030000000097 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf266 2.9593393986 0 83.5075001 0.39249989999999857 @@ -23138,7 +23138,7 @@ conf266 2.9593393986 0 83.5075001 0.39249989999999857 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf267 3.41172685129 0 83.2766666 0.6233334000000014 @@ -23225,7 +23225,7 @@ conf267 3.41172685129 0 83.2766666 0.6233334000000014 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf268 2.38589860172 0 83.311666475 0.5883335250000045 @@ -23312,7 +23312,7 @@ conf268 2.38589860172 0 83.311666475 0.5883335250000045 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf269 2.12944547394 0 83.467499575 0.43250042499999497 @@ -23399,7 +23399,7 @@ conf269 2.12944547394 0 83.467499575 0.43250042499999497 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf270 3.20776362372 0 83.172499575 0.491250637499995 @@ -23486,7 +23486,7 @@ conf270 3.20776362372 0 83.172499575 0.491250637499995 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf271 3.95396064036 0 83.240833525 0.6591664750000007 @@ -23573,7 +23573,7 @@ conf271 3.95396064036 0 83.240833525 0.6591664750000007 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf272 3.92038291833 0 82.4716655 1.5425017499999996 @@ -23660,7 +23660,7 @@ conf272 3.92038291833 0 82.4716655 1.5425017499999996 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf273 2.55641477625 0 83.0358341 0.6962488499999964 @@ -23746,8 +23746,8 @@ conf273 2.55641477625 0 83.0358341 0.6962488499999964 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf274 3.38281248672 0 82.490833825 1.5137492625000064 @@ -23834,7 +23834,7 @@ conf274 3.38281248672 0 82.490833825 1.5137492625000064 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf275 3.26886223339 0 82.390833125 1.6637503124999995 @@ -23921,7 +23921,7 @@ conf275 3.26886223339 0 82.390833125 1.6637503124999995 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf276 2.88984083604 0 83.253333025 0.6466669749999966 @@ -24008,7 +24008,7 @@ conf276 2.88984083604 0 83.253333025 0.6466669749999966 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf277 2.55175275146 0 82.50333375 1.4949993750000061 @@ -24094,8 +24094,8 @@ conf277 2.55175275146 0 82.50333375 1.4949993750000061 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf278 2.62376471646 0 82.295832175 1.8062517374999985 @@ -24182,7 +24182,7 @@ conf278 2.62376471646 0 82.295832175 1.8062517374999985 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf279 2.89529880947 0 82.244166875 1.8837496874999928 @@ -24269,7 +24269,7 @@ conf279 2.89529880947 0 82.244166875 1.8837496874999928 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf280 3.20641600424 0 82.4850002 1.5224996999999973 @@ -24356,7 +24356,7 @@ conf280 3.20641600424 0 82.4850002 1.5224996999999973 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf281 3.35812188872 0 82.4258341 1.6112488499999955 @@ -24443,7 +24443,7 @@ conf281 3.35812188872 0 82.4258341 1.6112488499999955 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf282 2.49677940941 0 82.71750005 1.1737499250000027 @@ -24530,7 +24530,7 @@ conf282 2.49677940941 0 82.71750005 1.1737499250000027 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf283 3.38120370248 0 82.390834075 1.6637488874999988 @@ -24617,7 +24617,7 @@ conf283 3.38120370248 0 82.390834075 1.6637488874999988 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf284 2.61676165298 0 82.824166125 1.0137508124999925 @@ -24704,7 +24704,7 @@ conf284 2.61676165298 0 82.824166125 1.0137508124999925 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf285 2.95471428151 0 83.240000175 0.6599998249999942 @@ -24791,7 +24791,7 @@ conf285 2.95471428151 0 83.240000175 0.6599998249999942 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf286 3.16800384081 0 82.459167225 1.5612491624999976 @@ -24878,7 +24878,7 @@ conf286 3.16800384081 0 82.459167225 1.5612491624999976 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf287 3.29072931971 0 82.5541669 1.4187496500000023 @@ -24965,7 +24965,7 @@ conf287 3.29072931971 0 82.5541669 1.4187496500000023 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf288 3.86883458805 0 82.56500035 1.402499474999992 @@ -25052,7 +25052,7 @@ conf288 3.86883458805 0 82.56500035 1.402499474999992 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf289 3.53914203218 0 83.42500025 0.4749997500000035 @@ -25139,7 +25139,7 @@ conf289 3.53914203218 0 83.42500025 0.4749997500000035 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf290 2.88656198137 0 83.3333337 0.5666663000000028 @@ -25226,7 +25226,7 @@ conf290 2.88656198137 0 83.3333337 0.5666663000000028 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf291 2.68828782884 0 82.304166225 1.7937506624999955 @@ -25313,7 +25313,7 @@ conf291 2.68828782884 0 82.304166225 1.7937506624999955 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf292 2.22188270191 0 83.49166645 0.40833355000000326 @@ -25399,8 +25399,8 @@ conf292 2.22188270191 0 83.49166645 0.40833355000000326 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf293 3.10276702124 0 82.691666975 1.212499537499994 @@ -25487,7 +25487,7 @@ conf293 3.10276702124 0 82.691666975 1.212499537499994 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf294 3.71861561279 0 81.845 2.4825000000000017 @@ -25574,7 +25574,7 @@ conf294 3.71861561279 0 81.845 2.4825000000000017 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf295 2.31264838619 0 81.90916705 2.3862494250000097 @@ -25660,8 +25660,8 @@ conf295 2.31264838619 0 81.90916705 2.3862494250000097 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf296 4.65131508141 0 81.721666225 2.6675006624999895 @@ -25748,7 +25748,7 @@ conf296 4.65131508141 0 81.721666225 2.6675006624999895 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf297 4.41041653795 0 81.8041672 2.5437492000000077 @@ -25835,7 +25835,7 @@ conf297 4.41041653795 0 81.8041672 2.5437492000000077 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf298 4.64777037234 0 81.7791666 2.5812501000000054 @@ -25922,7 +25922,7 @@ conf298 4.64777037234 0 81.7791666 2.5812501000000054 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf299 2.54362112272 0 81.86583315 2.4512502749999996 @@ -26009,7 +26009,7 @@ conf299 2.54362112272 0 81.86583315 2.4512502749999996 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf300 3.9092714242 0 81.80166725 2.5474991250000087 @@ -26096,7 +26096,7 @@ conf300 3.9092714242 0 81.80166725 2.5474991250000087 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf301 4.71458655761 0 81.76500005 2.6024999250000036 @@ -26183,7 +26183,7 @@ conf301 4.71458655761 0 81.76500005 2.6024999250000036 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf302 2.99958912291 0 82.183333025 1.975000462500006 @@ -26270,7 +26270,7 @@ conf302 2.99958912291 0 82.183333025 1.975000462500006 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf303 4.15215580161 0 81.7650006 2.60249910000001 @@ -26357,7 +26357,7 @@ conf303 4.15215580161 0 81.7650006 2.60249910000001 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf304 3.08724297596 0 82.7725004 1.0912494000000024 @@ -26444,7 +26444,7 @@ conf304 3.08724297596 0 82.7725004 1.0912494000000024 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf305 4.55102318011 0 81.726667575 2.65999863750001 @@ -26531,7 +26531,7 @@ conf305 4.55102318011 0 81.726667575 2.65999863750001 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf306 4.05237118816 0 81.7183329 2.6725006500000106 @@ -26618,7 +26618,7 @@ conf306 4.05237118816 0 81.7183329 2.6725006500000106 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf307 3.65495824565 0 81.864166075 2.4537508875 @@ -26705,7 +26705,7 @@ conf307 3.65495824565 0 81.864166075 2.4537508875 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf308 3.58660356691 0 83.1533337 0.5199994499999931 @@ -26792,7 +26792,7 @@ conf308 3.58660356691 0 83.1533337 0.5199994499999931 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf309 4.15683977316 0 81.81250045 2.5312493249999974 @@ -26879,7 +26879,7 @@ conf309 4.15683977316 0 81.81250045 2.5312493249999974 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf310 2.2957876366 0 81.892500625 2.411249062500005 @@ -26966,7 +26966,7 @@ conf310 2.2957876366 0 81.892500625 2.411249062500005 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf311 4.06936164451 0 81.83166655 2.5025001750000087 @@ -27053,7 +27053,7 @@ conf311 4.06936164451 0 81.83166655 2.5025001750000087 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf312 4.16443452935 0 81.8549995 2.467500749999992 @@ -27140,7 +27140,7 @@ conf312 4.16443452935 0 81.8549995 2.467500749999992 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf313 4.12196594403 0 81.826667225 2.5099991625000087 @@ -27227,7 +27227,7 @@ conf313 4.12196594403 0 81.826667225 2.5099991625000087 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf314 2.80762145408 0 81.9374994 2.3437509000000105 @@ -27314,7 +27314,7 @@ conf314 2.80762145408 0 81.9374994 2.3437509000000105 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf315 3.77961048884 0 82.096666975 2.1049995374999924 @@ -27401,7 +27401,7 @@ conf315 3.77961048884 0 82.096666975 2.1049995374999924 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf316 3.8515884894 0 81.816667425 2.5249988624999915 @@ -27488,7 +27488,7 @@ conf316 3.8515884894 0 81.816667425 2.5249988624999915 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf317 4.19486603401 0 81.712499225 2.681251162499997 @@ -27575,7 +27575,7 @@ conf317 4.19486603401 0 81.712499225 2.681251162499997 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf318 4.0553073343 0 81.748333075 2.6275003874999925 @@ -27662,7 +27662,7 @@ conf318 4.0553073343 0 81.748333075 2.6275003874999925 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf319 3.4554349638 0 83.5124993 0.38750069999999825 @@ -27749,7 +27749,7 @@ conf319 3.4554349638 0 83.5124993 0.38750069999999825 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf320 4.47851561355 0 81.7700007 2.5949989500000044 @@ -27836,7 +27836,7 @@ conf320 4.47851561355 0 81.7700007 2.5949989500000044 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf321 3.5209555481 0 83.0466657 0.6800014499999918 @@ -27923,7 +27923,7 @@ conf321 3.5209555481 0 83.0466657 0.6800014499999918 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf322 4.66740793088 0 82.42833425 1.607498624999991 @@ -28010,7 +28010,7 @@ conf322 4.66740793088 0 82.42833425 1.607498624999991 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf323 3.0015015591 0 83.6899995 0.21000050000000103 @@ -28097,7 +28097,7 @@ conf323 3.0015015591 0 83.6899995 0.21000050000000103 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf324 2.47501547765 0 83.31749875 0.5825012500000014 @@ -28183,8 +28183,8 @@ conf324 2.47501547765 0 83.31749875 0.5825012500000014 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf325 3.5212084815 0 83.172499875 0.4912501875000004 @@ -28271,7 +28271,7 @@ conf325 3.5212084815 0 83.172499875 0.4912501875000004 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf326 3.50606233828 0 83.2108336 0.6891663999999992 @@ -28358,7 +28358,7 @@ conf326 3.50606233828 0 83.2108336 0.6891663999999992 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf327 2.92583901753 0 83.28500025 0.6149997500000041 @@ -28445,7 +28445,7 @@ conf327 2.92583901753 0 83.28500025 0.6149997500000041 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf328 3.40109920082 0 83.1758331 0.4862503499999917 @@ -28532,7 +28532,7 @@ conf328 3.40109920082 0 83.1758331 0.4862503499999917 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf329 3.56759398638 0 83.21000045 0.6899995500000046 @@ -28619,7 +28619,7 @@ conf329 3.56759398638 0 83.21000045 0.6899995500000046 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf330 3.55806857582 0 83.169166525 0.49625021250000856 @@ -28706,7 +28706,7 @@ conf330 3.55806857582 0 83.169166525 0.49625021250000856 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf331 3.43688168131 0 83.131667175 0.552499237499994 @@ -28793,7 +28793,7 @@ conf331 3.43688168131 0 83.131667175 0.552499237499994 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf332 2.12603509822 0 83.31166615 0.5883338500000065 @@ -28879,8 +28879,8 @@ conf332 2.12603509822 0 83.31166615 0.5883338500000065 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf333 3.34049231646 0 83.560832775 0.3391672250000056 @@ -28967,7 +28967,7 @@ conf333 3.34049231646 0 83.560832775 0.3391672250000056 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf334 3.1381527329 0 83.294165025 0.6058349750000019 @@ -29054,7 +29054,7 @@ conf334 3.1381527329 0 83.294165025 0.6058349750000019 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf335 3.30692068622 0 83.186666675 0.46999998750000316 @@ -29141,7 +29141,7 @@ conf335 3.30692068622 0 83.186666675 0.46999998750000316 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf336 2.55450450958 0 83.429165825 0.4708341750000017 @@ -29227,8 +29227,8 @@ conf336 2.55450450958 0 83.429165825 0.4708341750000017 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf337 3.2983460176 0 83.409999525 0.49000047499999655 @@ -29315,7 +29315,7 @@ conf337 3.2983460176 0 83.409999525 0.49000047499999655 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf338 2.40445977697 0 83.72166715 0.1783328499999982 @@ -29401,8 +29401,8 @@ conf338 2.40445977697 0 83.72166715 0.1783328499999982 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf339 2.47649388334 0 83.3458338 0.5541662000000059 @@ -29488,8 +29488,8 @@ conf339 2.47649388334 0 83.3458338 0.5541662000000059 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf340 2.80986450982 0 83.6108329 0.28916709999999457 @@ -29576,7 +29576,7 @@ conf340 2.80986450982 0 83.6108329 0.28916709999999457 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf341 3.19996945711 0 82.20916635 1.9362504749999943 @@ -29663,7 +29663,7 @@ conf341 3.19996945711 0 82.20916635 1.9362504749999943 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf342 3.15022966077 0 82.3558336 1.7162496000000047 @@ -29750,7 +29750,7 @@ conf342 3.15022966077 0 82.3558336 1.7162496000000047 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf343 3.18630666863 0 82.309166375 1.7862504374999943 @@ -29837,7 +29837,7 @@ conf343 3.18630666863 0 82.309166375 1.7862504374999943 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf344 3.09659686575 0 82.7841664 1.0737503999999944 @@ -29924,7 +29924,7 @@ conf344 3.09659686575 0 82.7841664 1.0737503999999944 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf345 2.98635440336 0 82.5666672 1.3999992000000034 @@ -30011,7 +30011,7 @@ conf345 2.98635440336 0 82.5666672 1.3999992000000034 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf346 2.96599083939 0 83.289166375 0.6108336250000065 @@ -30098,7 +30098,7 @@ conf346 2.96599083939 0 83.289166375 0.6108336250000065 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf347 3.31764009092 0 83.229165675 0.6708343249999956 @@ -30185,7 +30185,7 @@ conf347 3.31764009092 0 83.229165675 0.6708343249999956 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf348 3.24530593811 0 82.744166375 1.133750437499991 @@ -30272,7 +30272,7 @@ conf348 3.24530593811 0 82.744166375 1.133750437499991 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf349 3.1672312347 0 82.32083345 1.7687498250000075 @@ -30359,7 +30359,7 @@ conf349 3.1672312347 0 82.32083345 1.7687498250000075 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf350 3.35194019608 0 82.245000875 1.8824986874999965 @@ -30446,7 +30446,7 @@ conf350 3.35194019608 0 82.245000875 1.8824986874999965 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf351 3.0778774074 0 82.307499275 1.7887510875000032 @@ -30533,7 +30533,7 @@ conf351 3.0778774074 0 82.307499275 1.7887510875000032 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf352 3.33260277956 0 83.4599995 0.440000500000005 @@ -30620,7 +30620,7 @@ conf352 3.33260277956 0 83.4599995 0.440000500000005 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf353 3.24103387077 0 82.2833328 1.825000800000005 @@ -30707,7 +30707,7 @@ conf353 3.24103387077 0 82.2833328 1.825000800000005 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf354 3.18069340099 0 82.5133333 1.480000050000001 @@ -30794,7 +30794,7 @@ conf354 3.18069340099 0 82.5133333 1.480000050000001 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf355 3.07429636563 0 82.3416668 1.737499800000002 @@ -30881,7 +30881,7 @@ conf355 3.07429636563 0 82.3416668 1.737499800000002 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf356 3.15235879862 0 82.336666425 1.7450003624999937 @@ -30968,7 +30968,7 @@ conf356 3.15235879862 0 82.336666425 1.7450003624999937 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf357 3.24530593811 0 82.67416655 1.238750175000007 @@ -31055,7 +31055,7 @@ conf357 3.24530593811 0 82.67416655 1.238750175000007 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf358 4.76954246445 0 82.019166125 2.221250812500003 @@ -31142,7 +31142,7 @@ conf358 4.76954246445 0 82.019166125 2.221250812500003 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf359 4.449901633 0 81.9024998 2.3962502999999984 @@ -31229,7 +31229,7 @@ conf359 4.449901633 0 81.9024998 2.3962502999999984 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf360 4.34207312855 0 82.398333475 1.6524997874999983 @@ -31316,7 +31316,7 @@ conf360 4.34207312855 0 82.398333475 1.6524997874999983 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf361 3.91922383284 0 82.130833025 2.0537504624999983 @@ -31403,7 +31403,7 @@ conf361 3.91922383284 0 82.130833025 2.0537504624999983 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf362 4.15854174264 0 82.52416725 1.4637491249999925 @@ -31490,7 +31490,7 @@ conf362 4.15854174264 0 82.52416725 1.4637491249999925 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf363 3.96230081862 0 82.055000075 2.167499887500007 @@ -31577,7 +31577,7 @@ conf363 3.96230081862 0 82.055000075 2.167499887500007 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf364 2.79832270858 0 82.564165975 1.4037510375000082 @@ -31663,8 +31663,8 @@ conf364 2.79832270858 0 82.564165975 1.4037510375000082 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf365 3.3989410127 0 81.7099991 2.685001349999993 @@ -31751,7 +31751,7 @@ conf365 3.3989410127 0 81.7099991 2.685001349999993 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf366 3.76110445166 0 82.480833025 1.5287504625000068 @@ -31838,7 +31838,7 @@ conf366 3.76110445166 0 82.480833025 1.5287504625000068 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf367 5.19355302987 0 81.9524996 2.321250600000006 @@ -31925,7 +31925,7 @@ conf367 5.19355302987 0 81.9524996 2.321250600000006 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf368 3.31882787728 0 82.1508331 2.0237503500000003 @@ -32012,7 +32012,7 @@ conf368 3.31882787728 0 82.1508331 2.0237503500000003 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf369 4.96680102787 0 82.078333275 2.1325000874999915 @@ -32099,7 +32099,7 @@ conf369 4.96680102787 0 82.078333275 2.1325000874999915 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf370 2.24298393464 0 82.670833925 1.243749112500005 @@ -32186,7 +32186,7 @@ conf370 2.24298393464 0 82.670833925 1.243749112500005 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf371 5.43694808897 0 82.025000375 2.212499437499993 @@ -32273,7 +32273,7 @@ conf371 5.43694808897 0 82.025000375 2.212499437499993 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf372 4.67197908065 0 83.112500825 0.5812487625000031 @@ -32360,7 +32360,7 @@ conf372 4.67197908065 0 83.112500825 0.5812487625000031 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf373 5.98685011161 0 82.0716662 2.1425007000000065 @@ -32447,7 +32447,7 @@ conf373 5.98685011161 0 82.0716662 2.1425007000000065 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf374 4.95751357757 0 82.541666875 1.4374996874999937 @@ -32534,7 +32534,7 @@ conf374 4.95751357757 0 82.541666875 1.4374996874999937 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf375 4.7705434053 0 82.6366661 1.295000850000001 @@ -32621,7 +32621,7 @@ conf375 4.7705434053 0 82.6366661 1.295000850000001 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf376 6.11253055706 0 81.839166625 2.4912500624999936 @@ -32708,7 +32708,7 @@ conf376 6.11253055706 0 81.839166625 2.4912500624999936 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf377 5.13531236708 0 81.947499425 2.3287508624999944 @@ -32795,7 +32795,7 @@ conf377 5.13531236708 0 81.947499425 2.3287508624999944 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf378 5.68419530524 0 81.885832675 2.4212509874999952 @@ -32882,7 +32882,7 @@ conf378 5.68419530524 0 81.885832675 2.4212509874999952 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf379 3.3989410127 0 82.595833525 1.356249712499995 @@ -32969,7 +32969,7 @@ conf379 3.3989410127 0 82.595833525 1.356249712499995 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf380 3.3989410127 0 82.13083365 2.0537495250000006 @@ -33056,7 +33056,7 @@ conf380 3.3989410127 0 82.13083365 2.0537495250000006 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf381 5.26587015 0 82.03000015 2.2049997749999903 @@ -33143,7 +33143,7 @@ conf381 5.26587015 0 82.03000015 2.2049997749999903 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf382 5.8542348193 0 81.706667125 2.6899993125000066 @@ -33230,7 +33230,7 @@ conf382 5.8542348193 0 81.706667125 2.6899993125000066 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf383 3.12871807395 0 81.815833525 2.5262497124999967 @@ -33317,7 +33317,7 @@ conf383 3.12871807395 0 81.815833525 2.5262497124999967 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf384 6.01904155181 0 81.811666275 2.53250058750001 @@ -33404,7 +33404,7 @@ conf384 6.01904155181 0 81.811666275 2.53250058750001 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf385 4.20358583892 0 81.661666975 2.7574995374999958 @@ -33491,7 +33491,7 @@ conf385 4.20358583892 0 81.661666975 2.7574995374999958 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf386 3.91060095759 0 82.50666635 1.490000474999995 @@ -33578,7 +33578,7 @@ conf386 3.91060095759 0 82.50666635 1.490000474999995 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf387 3.75751334685 0 82.062499925 2.156250112500004 @@ -33664,8 +33664,8 @@ conf387 3.75751334685 0 82.062499925 2.156250112500004 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf388 6.07933410004 0 82.0766671 2.134999350000008 @@ -33752,7 +33752,7 @@ conf388 6.07933410004 0 82.0766671 2.134999350000008 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf389 5.73440892644 0 81.7983341 2.552498849999992 @@ -33839,7 +33839,7 @@ conf389 5.73440892644 0 81.7983341 2.552498849999992 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf390 2.11827448839 0 82.49083305 1.5137504249999907 @@ -33925,8 +33925,8 @@ conf390 2.11827448839 0 82.49083305 1.5137504249999907 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf391 5.11910106906 0 82.3866668 1.6699997999999994 @@ -34013,7 +34013,7 @@ conf391 5.11910106906 0 82.3866668 1.6699997999999994 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf392 3.3989410127 0 82.1033331 2.0950003499999994 @@ -34100,7 +34100,7 @@ conf392 3.3989410127 0 82.1033331 2.0950003499999994 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf393 3.25823638691 0 82.740000525 1.1399992125000011 @@ -34187,7 +34187,7 @@ conf393 3.25823638691 0 82.740000525 1.1399992125000011 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf394 2.87183708038 0 82.43166675 1.6024998749999924 @@ -34274,7 +34274,7 @@ conf394 2.87183708038 0 82.43166675 1.6024998749999924 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf395 6.09920856411 0 81.79083275 2.5637508749999895 @@ -34361,7 +34361,7 @@ conf395 6.09920856411 0 81.79083275 2.5637508749999895 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf396 5.05299439803 0 81.874166875 2.4387496874999997 @@ -34448,7 +34448,7 @@ conf396 5.05299439803 0 81.874166875 2.4387496874999997 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf397 5.55984761608 0 81.838333975 2.4924990375000036 @@ -34535,7 +34535,7 @@ conf397 5.55984761608 0 81.838333975 2.4924990375000036 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf398 6.20896956368 0 81.764166375 2.603750437499997 @@ -34622,7 +34622,7 @@ conf398 6.20896956368 0 81.764166375 2.603750437499997 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf399 2.08997085298 0 83.324998925 0.5750010749999973 @@ -34708,8 +34708,8 @@ conf399 2.08997085298 0 83.324998925 0.5750010749999973 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf400 2.67909425977 0 83.4074999 0.4925000999999952 @@ -34796,7 +34796,7 @@ conf400 2.67909425977 0 83.4074999 0.4925000999999952 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf401 3.39279520729 0 83.512500375 0.3874996249999981 @@ -34883,7 +34883,7 @@ conf401 3.39279520729 0 83.512500375 0.3874996249999981 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf402 3.74096285582 0 83.342499875 0.5575001249999986 @@ -34970,7 +34970,7 @@ conf402 3.74096285582 0 83.342499875 0.5575001249999986 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf403 2.72520189649 0 83.285000675 0.6149993249999938 @@ -35057,7 +35057,7 @@ conf403 2.72520189649 0 83.285000675 0.6149993249999938 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf404 2.47020919834 0 83.485832825 0.4141671749999972 @@ -35143,8 +35143,8 @@ conf404 2.47020919834 0 83.485832825 0.4141671749999972 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf405 3.52973327747 0 83.579999725 0.3200002750000067 @@ -35231,7 +35231,7 @@ conf405 3.52973327747 0 83.579999725 0.3200002750000067 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf406 3.16324896856 0 83.229167325 0.6708326749999941 @@ -35318,7 +35318,7 @@ conf406 3.16324896856 0 83.229167325 0.6708326749999941 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf407 3.53910737775 0 83.537499625 0.36250037500000476 @@ -35405,7 +35405,7 @@ conf407 3.53910737775 0 83.537499625 0.36250037500000476 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf408 2.46717848922 0 83.366667525 0.5333324750000031 @@ -35491,8 +35491,8 @@ conf408 2.46717848922 0 83.366667525 0.5333324750000031 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf409 3.14520849175 0 83.46833325 0.43166674999999943 @@ -35579,7 +35579,7 @@ conf409 3.14520849175 0 83.46833325 0.43166674999999943 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf410 3.73983800903 0 83.396666425 0.5033335749999935 @@ -35666,7 +35666,7 @@ conf410 3.73983800903 0 83.396666425 0.5033335749999935 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf411 2.69718545933 0 83.42249985 0.4775001500000059 @@ -35753,7 +35753,7 @@ conf411 2.69718545933 0 83.42249985 0.4775001500000059 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf412 3.45563746073 0 83.18083305 0.4787504249999941 @@ -35840,7 +35840,7 @@ conf412 3.45563746073 0 83.18083305 0.4787504249999941 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf413 3.81594846135 0 83.05333355 0.6699996749999926 @@ -35927,7 +35927,7 @@ conf413 3.81594846135 0 83.05333355 0.6699996749999926 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf414 3.22592627458 0 83.51083305 0.3891669499999978 @@ -36014,7 +36014,7 @@ conf414 3.22592627458 0 83.51083305 0.3891669499999978 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf415 3.32121276575 0 83.50249925 0.3975007500000004 @@ -36101,7 +36101,7 @@ conf415 3.32121276575 0 83.50249925 0.3975007500000004 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf416 2.50486697002 0 83.28333315 0.6166668499999958 @@ -36187,8 +36187,8 @@ conf416 2.50486697002 0 83.28333315 0.6166668499999958 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf417 3.43033061199 0 83.376665675 0.5233343250000019 @@ -36275,7 +36275,7 @@ conf417 3.43033061199 0 83.376665675 0.5233343250000019 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf418 3.55185414537 0 83.323333775 0.5766662250000053 @@ -36362,7 +36362,7 @@ conf418 3.55185414537 0 83.323333775 0.5766662250000053 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf419 3.45180134988 0 83.13416695 0.5487495750000093 @@ -36449,7 +36449,7 @@ conf419 3.45180134988 0 83.13416695 0.5487495750000093 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf420 2.51737484435 0 83.319999875 0.5800001250000065 @@ -36535,8 +36535,8 @@ conf420 2.51737484435 0 83.319999875 0.5800001250000065 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf421 2.47649388334 0 83.281667075 0.6183329249999986 @@ -36622,8 +36622,8 @@ conf421 2.47649388334 0 83.281667075 0.6183329249999986 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf422 3.52973327747 0 83.4341669 0.4658331000000061 @@ -36710,7 +36710,7 @@ conf422 3.52973327747 0 83.4341669 0.4658331000000061 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf423 3.30543425366 0 83.2149995 0.6850004999999953 @@ -36797,7 +36797,7 @@ conf423 3.30543425366 0 83.2149995 0.6850004999999953 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf424 3.69546000476 0 83.3700008 0.5299992000000003 @@ -36884,7 +36884,7 @@ conf424 3.69546000476 0 83.3700008 0.5299992000000003 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf425 2.54866692533 0 83.23500055 0.6649994500000048 @@ -36970,8 +36970,8 @@ conf425 2.54866692533 0 83.23500055 0.6649994500000048 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf426 2.52414632919 0 83.30499955 0.5950004499999949 @@ -37057,8 +37057,8 @@ conf426 2.52414632919 0 83.30499955 0.5950004499999949 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf427 3.67819030212 0 83.410000125 0.48999987500000375 @@ -37145,7 +37145,7 @@ conf427 3.67819030212 0 83.410000125 0.48999987500000375 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf428 3.66653335987 0 83.6833336 0.2166664000000026 @@ -37232,7 +37232,7 @@ conf428 3.66653335987 0 83.6833336 0.2166664000000026 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf429 2.5369425715 0 83.452499925 0.4475000750000021 @@ -37318,8 +37318,8 @@ conf429 2.5369425715 0 83.452499925 0.4475000750000021 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf430 2.47501547765 0 83.296666925 0.6033330750000033 @@ -37405,8 +37405,8 @@ conf430 2.47501547765 0 83.296666925 0.6033330750000033 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf431 2.73512859106 0 83.1441661 0.5337508499999899 @@ -37492,8 +37492,8 @@ conf431 2.73512859106 0 83.1441661 0.5337508499999899 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf432 3.73049052619 0 83.43333345 0.4666665499999937 @@ -37580,7 +37580,7 @@ conf432 3.73049052619 0 83.43333345 0.4666665499999937 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf433 3.11116327032 0 83.4500002 0.44999979999999484 @@ -37667,7 +37667,7 @@ conf433 3.11116327032 0 83.4500002 0.44999979999999484 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf434 3.59288986667 0 83.6958339 0.20416610000000335 @@ -37754,7 +37754,7 @@ conf434 3.59288986667 0 83.6958339 0.20416610000000335 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf435 3.69363094479 0 83.466666825 0.4333331749999957 @@ -37841,7 +37841,7 @@ conf435 3.69363094479 0 83.466666825 0.4333331749999957 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf436 2.44405144164 0 83.6624992 0.23750080000000084 @@ -37927,8 +37927,8 @@ conf436 2.44405144164 0 83.6624992 0.23750080000000084 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf437 2.52414632919 0 83.298332625 0.6016673749999996 @@ -38014,8 +38014,8 @@ conf437 2.52414632919 0 83.298332625 0.6016673749999996 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf438 3.43724518747 0 83.469165875 0.4308341249999984 @@ -38102,7 +38102,7 @@ conf438 3.43724518747 0 83.469165875 0.4308341249999984 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf439 2.47403085411 0 83.32166695 0.5783330500000062 @@ -38188,8 +38188,8 @@ conf439 2.47403085411 0 83.32166695 0.5783330500000062 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf440 3.10613698764 0 83.718333025 0.18166697499999318 @@ -38276,7 +38276,7 @@ conf440 3.10613698764 0 83.718333025 0.18166697499999318 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf441 3.43696375659 0 83.7041672 0.19583279999999947 @@ -38363,7 +38363,7 @@ conf441 3.43696375659 0 83.7041672 0.19583279999999947 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf442 2.30377764101 0 83.07916665 0.6312500249999928 @@ -38450,7 +38450,7 @@ conf442 2.30377764101 0 83.07916665 0.6312500249999928 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf443 3.86195191894 0 82.57083345 1.3937498250000075 @@ -38537,7 +38537,7 @@ conf443 3.86195191894 0 82.57083345 1.3937498250000075 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf444 3.42141563349 0 82.7016671 1.1974993500000082 @@ -38624,7 +38624,7 @@ conf444 3.42141563349 0 82.7016671 1.1974993500000082 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf445 3.18878419794 0 82.9391669 0.8412496499999946 @@ -38711,7 +38711,7 @@ conf445 3.18878419794 0 82.9391669 0.8412496499999946 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf446 3.95857024721 0 83.29499975 0.6050002499999977 @@ -38798,7 +38798,7 @@ conf446 3.95857024721 0 83.29499975 0.6050002499999977 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf447 3.42141563349 0 82.6366663 1.2950005499999975 @@ -38885,7 +38885,7 @@ conf447 3.42141563349 0 82.6366663 1.2950005499999975 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf448 3.7537079845 0 82.355000175 1.717499737499999 @@ -38972,7 +38972,7 @@ conf448 3.7537079845 0 82.355000175 1.717499737499999 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf449 3.89976572994 0 82.8266675 1.0099987500000012 @@ -39059,7 +39059,7 @@ conf449 3.89976572994 0 82.8266675 1.0099987500000012 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf450 3.70562610654 0 82.665833675 1.2512494874999973 @@ -39146,7 +39146,7 @@ conf450 3.70562610654 0 82.665833675 1.2512494874999973 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf451 3.88737706866 0 82.85666655 0.9650001750000001 @@ -39233,7 +39233,7 @@ conf451 3.88737706866 0 82.85666655 0.9650001750000001 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf452 4.09696503312 0 83.220833025 0.6791669749999955 @@ -39320,7 +39320,7 @@ conf452 4.09696503312 0 83.220833025 0.6791669749999955 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf453 4.11156497342 0 82.35666615 1.7150007750000071 @@ -39407,7 +39407,7 @@ conf453 4.11156497342 0 82.35666615 1.7150007750000071 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf454 3.8410452777 0 83.389999125 0.5100008749999972 @@ -39494,7 +39494,7 @@ conf454 3.8410452777 0 83.389999125 0.5100008749999972 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf455 3.71389879516 0 82.65583325 1.2662501249999991 @@ -39581,7 +39581,7 @@ conf455 3.71389879516 0 82.65583325 1.2662501249999991 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf456 3.12961658197 0 82.4508332 1.573750199999992 @@ -39668,7 +39668,7 @@ conf456 3.12961658197 0 82.4508332 1.573750199999992 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf457 3.54734999035 0 83.430000575 0.469999425000006 @@ -39755,7 +39755,7 @@ conf457 3.54734999035 0 83.430000575 0.469999425000006 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf458 3.86395152513 0 82.4875004 1.5187493999999973 @@ -39842,7 +39842,7 @@ conf458 3.86395152513 0 82.4875004 1.5187493999999973 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf459 2.89974302229 0 82.382499875 1.6762501875000098 @@ -39928,8 +39928,8 @@ conf459 2.89974302229 0 82.382499875 1.6762501875000098 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf460 3.48886042646 0 82.7250005 1.1624992500000104 @@ -40016,7 +40016,7 @@ conf460 3.48886042646 0 82.7250005 1.1624992500000104 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf461 4.09292044776 0 83.33333365 0.5666663499999999 @@ -40103,7 +40103,7 @@ conf461 4.09292044776 0 83.33333365 0.5666663499999999 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf462 3.97558461307 0 82.384999075 1.6725013875000059 @@ -40190,7 +40190,7 @@ conf462 3.97558461307 0 82.384999075 1.6725013875000059 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf463 4.09022849113 0 83.3025002 0.5974998000000028 @@ -40277,7 +40277,7 @@ conf463 4.09022849113 0 83.3025002 0.5974998000000028 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf464 3.24530593811 0 82.56083355 1.4087496750000028 @@ -40364,7 +40364,7 @@ conf464 3.24530593811 0 82.56083355 1.4087496750000028 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf465 3.78717886042 0 82.84083325 0.9887501249999957 @@ -40451,7 +40451,7 @@ conf465 3.78717886042 0 82.84083325 0.9887501249999957 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf466 3.93879714412 0 82.830000125 1.004999812500003 @@ -40538,7 +40538,7 @@ conf466 3.93879714412 0 82.830000125 1.004999812500003 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf467 4.10835433149 0 83.284166175 0.6158338250000043 @@ -40625,7 +40625,7 @@ conf467 4.10835433149 0 83.284166175 0.6158338250000043 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf468 3.90747388907 0 83.33583325 0.5641667500000068 @@ -40712,7 +40712,7 @@ conf468 3.90747388907 0 83.33583325 0.5641667500000068 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf469 2.53859661959 0 82.6516662 1.272500700000009 @@ -40798,8 +40798,8 @@ conf469 2.53859661959 0 82.6516662 1.272500700000009 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf470 4.12303570384 0 82.400000275 1.6499995875000053 @@ -40886,7 +40886,7 @@ conf470 4.12303570384 0 82.400000275 1.6499995875000053 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf471 2.4825808753 0 83.890000725 0.009999275000006969 @@ -40973,7 +40973,7 @@ conf471 2.4825808753 0 83.890000725 0.009999275000006969 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf472 4.03091892409 0 83.015833475 0.7262497875000093 @@ -41060,7 +41060,7 @@ conf472 4.03091892409 0 83.015833475 0.7262497875000093 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf473 3.94573901698 0 82.7941671 1.0587493500000065 @@ -41147,7 +41147,7 @@ conf473 3.94573901698 0 82.7941671 1.0587493500000065 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf474 3.81804847244 0 82.634166775 1.2987498375000044 @@ -41234,7 +41234,7 @@ conf474 3.81804847244 0 82.634166775 1.2987498375000044 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf475 3.51197806787 0 82.906666375 0.8900004374999995 @@ -41321,7 +41321,7 @@ conf475 3.51197806787 0 82.906666375 0.8900004374999995 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf476 3.35148515003 0 82.644166625 1.2837500625000047 @@ -41408,7 +41408,7 @@ conf476 3.35148515003 0 82.644166625 1.2837500625000047 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf477 3.64680056168 0 82.7950001 1.0574998500000063 @@ -41495,7 +41495,7 @@ conf477 3.64680056168 0 82.7950001 1.0574998500000063 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf478 3.72000418322 0 83.456667675 0.4433323249999944 @@ -41582,7 +41582,7 @@ conf478 3.72000418322 0 83.456667675 0.4433323249999944 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf479 4.13277783134 0 82.530832425 1.4537513625000003 @@ -41669,7 +41669,7 @@ conf479 4.13277783134 0 82.530832425 1.4537513625000003 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf480 4.26183413039 0 82.714165825 1.1787512625000076 @@ -41756,7 +41756,7 @@ conf480 4.26183413039 0 82.714165825 1.1787512625000076 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf481 3.23620317817 0 83.55666655 0.34333344999999726 @@ -41843,7 +41843,7 @@ conf481 3.23620317817 0 83.55666655 0.34333344999999726 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf482 3.42306292045 0 82.685000625 1.222499062499999 @@ -41930,7 +41930,7 @@ conf482 3.42306292045 0 82.685000625 1.222499062499999 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf483 4.61790687055 0 82.47083265 1.5437510249999917 @@ -42017,7 +42017,7 @@ conf483 4.61790687055 0 82.47083265 1.5437510249999917 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf484 3.80271470043 0 82.503333875 1.4949991875000066 @@ -42104,7 +42104,7 @@ conf484 3.80271470043 0 82.503333875 1.4949991875000066 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf485 4.0096141492 0 82.503333175 1.4950002375000082 @@ -42191,7 +42191,7 @@ conf485 4.0096141492 0 82.503333175 1.4950002375000082 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf486 3.85955229037 0 82.624166675 1.3137499875000032 @@ -42278,7 +42278,7 @@ conf486 3.85955229037 0 82.624166675 1.3137499875000032 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf487 3.12961658197 0 82.385832425 1.6712513624999943 @@ -42365,7 +42365,7 @@ conf487 3.12961658197 0 82.385832425 1.6712513624999943 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf488 4.74816504674 0 82.915833575 0.8762496375000097 @@ -42452,7 +42452,7 @@ conf488 4.74816504674 0 82.915833575 0.8762496375000097 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf489 2.33564385687 0 82.100833175 2.098750237499992 @@ -42538,8 +42538,8 @@ conf489 2.33564385687 0 82.100833175 2.098750237499992 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf490 6.31231852253 0 81.88666685 2.419999725000004 @@ -42626,7 +42626,7 @@ conf490 6.31231852253 0 81.88666685 2.419999725000004 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf491 5.91056159856 0 82.027500225 2.2087496625000043 @@ -42713,7 +42713,7 @@ conf491 5.91056159856 0 82.027500225 2.2087496625000043 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf492 4.09026452693 0 81.93333315 2.3500002750000064 @@ -42800,7 +42800,7 @@ conf492 4.09026452693 0 81.93333315 2.3500002750000064 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf493 2.29057910951 0 82.68499995 1.2225000749999921 @@ -42887,7 +42887,7 @@ conf493 2.29057910951 0 82.68499995 1.2225000749999921 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf494 3.31826782794 0 81.898333475 2.4024997874999983 @@ -42974,7 +42974,7 @@ conf494 3.31826782794 0 81.898333475 2.4024997874999983 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf495 2.47740126923 0 81.938332325 2.342501512499993 @@ -43060,8 +43060,8 @@ conf495 2.47740126923 0 81.938332325 2.342501512499993 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf496 4.86557422781 0 81.960833525 2.3087497125000027 @@ -43148,7 +43148,7 @@ conf496 4.86557422781 0 81.960833525 2.3087497125000027 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf497 4.39304255921 0 82.727500175 1.1587497374999955 @@ -43235,7 +43235,7 @@ conf497 4.39304255921 0 82.727500175 1.1587497374999955 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf498 3.73256635056 0 82.05666695 2.16499957500001 @@ -43322,7 +43322,7 @@ conf498 3.73256635056 0 82.05666695 2.16499957500001 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf499 6.20896956368 0 81.7933327 2.56000095000001 @@ -43409,7 +43409,7 @@ conf499 6.20896956368 0 81.7933327 2.56000095000001 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf500 3.86268155745 0 81.934167525 2.34874871249999 @@ -43496,7 +43496,7 @@ conf500 3.86268155745 0 81.934167525 2.34874871249999 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf501 6.01904155181 0 81.819999625 2.520000562500009 @@ -43583,7 +43583,7 @@ conf501 6.01904155181 0 81.819999625 2.520000562500009 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf502 6.35947216799 0 81.776667425 2.584998862500001 @@ -43670,7 +43670,7 @@ conf502 6.35947216799 0 81.776667425 2.584998862500001 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf503 5.21477775932 0 81.994167525 2.258748712500008 @@ -43757,7 +43757,7 @@ conf503 5.21477775932 0 81.994167525 2.258748712500008 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf504 3.4313336476 0 82.8816661 0.9275008499999942 @@ -43844,7 +43844,7 @@ conf504 3.4313336476 0 82.8816661 0.9275008499999942 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf505 3.68328404231 0 82.05750025 2.163749624999994 @@ -43931,7 +43931,7 @@ conf505 3.68328404231 0 82.05750025 2.163749624999994 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf506 5.92836922932 0 81.877500125 2.433749812500004 @@ -44018,7 +44018,7 @@ conf506 5.92836922932 0 81.877500125 2.433749812500004 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf507 3.80663695682 0 81.9374992 2.3437511999999927 @@ -44105,7 +44105,7 @@ conf507 3.80663695682 0 81.9374992 2.3437511999999927 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf508 5.05770084008 0 82.24583295 1.88125057500001 @@ -44192,7 +44192,7 @@ conf508 5.05770084008 0 82.24583295 1.88125057500001 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf509 3.20777861875 0 82.08083205 2.128751925000003 @@ -44279,7 +44279,7 @@ conf509 3.20777861875 0 82.08083205 2.128751925000003 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf510 3.45870051953 0 82.066666825 2.149999762500002 @@ -44366,7 +44366,7 @@ conf510 3.45870051953 0 82.066666825 2.149999762500002 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf511 5.69409031372 0 81.924167325 2.3637490125000014 @@ -44453,7 +44453,7 @@ conf511 5.69409031372 0 81.924167325 2.3637490125000014 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf512 6.06060941775 0 81.9683331 2.297500350000007 @@ -44540,7 +44540,7 @@ conf512 6.06060941775 0 81.9683331 2.297500350000007 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf513 4.1918323886 0 82.824166075 1.0137508875000094 @@ -44627,7 +44627,7 @@ conf513 4.1918323886 0 82.824166075 1.0137508875000094 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf514 5.51311465307 0 82.0116664 2.2325004000000064 @@ -44714,7 +44714,7 @@ conf514 5.51311465307 0 82.0116664 2.2325004000000064 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf515 5.94347809389 0 81.9191666 2.3712501000000046 @@ -44801,7 +44801,7 @@ conf515 5.94347809389 0 81.9191666 2.3712501000000046 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf516 4.65273283316 0 83.041665875 0.6875011874999899 @@ -44888,7 +44888,7 @@ conf516 4.65273283316 0 83.041665875 0.6875011874999899 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf517 2.6302261544 0 83.335834275 0.5641657250000037 @@ -44975,7 +44975,7 @@ conf517 2.6302261544 0 83.335834275 0.5641657250000037 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf518 3.22579199405 0 83.4974991 0.40250090000000116 @@ -45062,7 +45062,7 @@ conf518 3.22579199405 0 83.4974991 0.40250090000000116 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf519 3.27967368224 0 83.130833075 0.5537503875000027 @@ -45149,7 +45149,7 @@ conf519 3.27967368224 0 83.130833075 0.5537503875000027 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf520 3.43425412049 0 83.53666685 0.3633331499999969 @@ -45236,7 +45236,7 @@ conf520 3.43425412049 0 83.53666685 0.3633331499999969 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf521 3.28200602128 0 83.10166715 0.5974992750000041 @@ -45323,7 +45323,7 @@ conf521 3.28200602128 0 83.10166715 0.5974992750000041 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf522 3.59684205595 0 83.177500325 0.48374951250000464 @@ -45410,7 +45410,7 @@ conf522 3.59684205595 0 83.177500325 0.48374951250000464 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf523 3.59452470708 0 83.314166525 0.5858334749999955 @@ -45497,7 +45497,7 @@ conf523 3.59452470708 0 83.314166525 0.5858334749999955 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf524 2.46849388159 0 83.525832375 0.3741676250000069 @@ -45583,8 +45583,8 @@ conf524 2.46849388159 0 83.525832375 0.3741676250000069 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf525 3.59984947477 0 83.574999275 0.3250007250000039 @@ -45671,7 +45671,7 @@ conf525 3.59984947477 0 83.574999275 0.3250007250000039 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf526 2.56221404472 0 83.107499125 0.5887513124999941 @@ -45758,7 +45758,7 @@ conf526 2.56221404472 0 83.107499125 0.5887513124999941 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf527 2.47649388334 0 83.346667275 0.5533327249999985 @@ -45844,8 +45844,8 @@ conf527 2.47649388334 0 83.346667275 0.5533327249999985 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf528 3.45475224289 0 83.1041657 0.5937514500000063 @@ -45932,7 +45932,7 @@ conf528 3.45475224289 0 83.1041657 0.5937514500000063 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf529 3.14847976496 0 83.299999275 0.6000007249999953 @@ -46019,7 +46019,7 @@ conf529 3.14847976496 0 83.299999275 0.6000007249999953 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf530 3.7245329101 0 82.9900001 0.7649998499999953 @@ -46106,7 +46106,7 @@ conf530 3.7245329101 0 82.9900001 0.7649998499999953 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf531 3.62667420737 0 83.133332425 0.5500013624999909 @@ -46193,7 +46193,7 @@ conf531 3.62667420737 0 83.133332425 0.5500013624999909 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf532 2.64505034404 0 83.412499775 0.48750022499999945 @@ -46280,7 +46280,7 @@ conf532 2.64505034404 0 83.412499775 0.48750022499999945 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf533 3.19448276073 0 83.324167425 0.5758325750000012 @@ -46367,7 +46367,7 @@ conf533 3.19448276073 0 83.324167425 0.5758325750000012 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf534 3.4521680168 0 83.34083295 0.5591670499999936 @@ -46454,7 +46454,7 @@ conf534 3.4521680168 0 83.34083295 0.5591670499999936 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf535 3.22348002932 0 83.42249985 0.4775001500000059 @@ -46541,7 +46541,7 @@ conf535 3.22348002932 0 83.42249985 0.4775001500000059 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf536 2.46542270496 0 83.481667075 0.41833292499999575 @@ -46627,8 +46627,8 @@ conf536 2.46542270496 0 83.481667075 0.41833292499999575 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf537 2.52778795522 0 83.486665925 0.4133340750000031 @@ -46714,8 +46714,8 @@ conf537 2.52778795522 0 83.486665925 0.4133340750000031 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf538 2.56221404472 0 83.144166975 0.5337495374999932 @@ -46801,8 +46801,8 @@ conf538 2.56221404472 0 83.144166975 0.5337495374999932 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf539 3.27823297285 0 83.20333445 0.6966655500000002 @@ -46889,7 +46889,7 @@ conf539 3.27823297285 0 83.20333445 0.6966655500000002 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf540 3.6180742183 0 82.60750005 1.338749925000002 @@ -46976,7 +46976,7 @@ conf540 3.6180742183 0 82.60750005 1.338749925000002 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf541 3.92533229567 0 83.4591669 0.4408331000000004 @@ -47063,7 +47063,7 @@ conf541 3.92533229567 0 83.4591669 0.4408331000000004 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf542 3.58859383862 0 82.264166475 1.8537502875000058 @@ -47150,7 +47150,7 @@ conf542 3.58859383862 0 82.264166475 1.8537502875000058 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf543 3.22297604526 0 82.75916615 1.111250775000002 @@ -47237,7 +47237,7 @@ conf543 3.22297604526 0 82.75916615 1.111250775000002 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf544 3.66372023461 0 82.59166585 1.3625012250000026 @@ -47324,7 +47324,7 @@ conf544 3.66372023461 0 82.59166585 1.3625012250000026 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf545 3.91002002291 0 82.5483341 1.427498849999992 @@ -47411,7 +47411,7 @@ conf545 3.91002002291 0 82.5483341 1.427498849999992 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf546 3.96917380416 0 82.402500725 1.6462489125000062 @@ -47498,7 +47498,7 @@ conf546 3.96917380416 0 82.402500725 1.6462489125000062 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf547 3.95854211657 0 82.256666125 1.865000812500007 @@ -47585,7 +47585,7 @@ conf547 3.95854211657 0 82.256666125 1.865000812500007 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf548 3.66981520647 0 82.52583375 1.4612493749999942 @@ -47672,7 +47672,7 @@ conf548 3.66981520647 0 82.52583375 1.4612493749999942 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf549 3.8906919752 0 82.56250095 1.4062485749999993 @@ -47759,7 +47759,7 @@ conf549 3.8906919752 0 82.56250095 1.4062485749999993 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf550 3.80542946014 0 82.589166625 1.3662500624999936 @@ -47846,7 +47846,7 @@ conf550 3.80542946014 0 82.589166625 1.3662500624999936 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf551 4.01038027961 0 83.020833375 0.7187499375000073 @@ -47933,7 +47933,7 @@ conf551 4.01038027961 0 83.020833375 0.7187499375000073 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf552 3.54824634447 0 82.628332675 1.3075009875000063 @@ -48020,7 +48020,7 @@ conf552 3.54824634447 0 82.628332675 1.3075009875000063 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf553 3.99739578291 0 82.5425003 1.4362495499999994 @@ -48107,7 +48107,7 @@ conf553 3.99739578291 0 82.5425003 1.4362495499999994 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf554 3.42612797341 0 82.64000035 1.289999475000009 @@ -48194,7 +48194,7 @@ conf554 3.42612797341 0 82.64000035 1.289999475000009 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf555 3.37950116826 0 82.5966667 1.3549999499999998 @@ -48281,7 +48281,7 @@ conf555 3.37950116826 0 82.5966667 1.3549999499999998 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf556 3.85260352333 0 82.516667575 1.4749986375000006 @@ -48368,7 +48368,7 @@ conf556 3.85260352333 0 82.516667575 1.4749986375000006 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf557 3.70250486116 0 83.482500475 0.4174995250000052 @@ -48455,7 +48455,7 @@ conf557 3.70250486116 0 83.482500475 0.4174995250000052 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf558 3.85830895124 0 82.7925002 1.0612496999999905 @@ -48542,7 +48542,7 @@ conf558 3.85830895124 0 82.7925002 1.0612496999999905 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf559 3.08261368468 0 82.583333825 1.3749992625000047 @@ -48629,7 +48629,7 @@ conf559 3.08261368468 0 82.583333825 1.3749992625000047 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf560 3.84859290112 0 82.804166725 1.0437499124999974 @@ -48716,7 +48716,7 @@ conf560 3.84859290112 0 82.804166725 1.0437499124999974 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf561 3.88910440715 0 82.49000105 1.5149984249999946 @@ -48803,7 +48803,7 @@ conf561 3.88910440715 0 82.49000105 1.5149984249999946 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf562 3.77712344616 0 82.583332925 1.3750006125000098 @@ -48890,7 +48890,7 @@ conf562 3.77712344616 0 82.583332925 1.3750006125000098 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf563 3.3516889331 0 82.640000425 1.289999362500005 @@ -48977,7 +48977,7 @@ conf563 3.3516889331 0 82.640000425 1.289999362500005 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf564 3.24530593811 0 82.568332375 1.3975014375000043 @@ -49064,7 +49064,7 @@ conf564 3.24530593811 0 82.568332375 1.3975014375000043 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf565 3.76898427543 0 82.60916635 1.336250475000007 @@ -49151,7 +49151,7 @@ conf565 3.76898427543 0 82.60916635 1.336250475000007 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf566 3.44754596993 0 82.272499425 1.8412508624999901 @@ -49238,7 +49238,7 @@ conf566 3.44754596993 0 82.272499425 1.8412508624999901 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf567 4.05924378827 0 82.56500135 1.4024979749999957 @@ -49325,7 +49325,7 @@ conf567 4.05924378827 0 82.56500135 1.4024979749999957 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf568 3.93740416705 0 82.522498825 1.4662517625000007 @@ -49412,7 +49412,7 @@ conf568 3.93740416705 0 82.522498825 1.4662517625000007 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf569 3.84132198203 0 82.5683344 1.3974984000000035 @@ -49499,7 +49499,7 @@ conf569 3.84132198203 0 82.5683344 1.3974984000000035 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf570 3.09230255687 0 82.6708328 1.2437508000000008 @@ -49586,7 +49586,7 @@ conf570 3.09230255687 0 82.6708328 1.2437508000000008 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf571 3.7426849711 0 82.5908331 1.3637503500000037 @@ -49673,7 +49673,7 @@ conf571 3.7426849711 0 82.5908331 1.3637503500000037 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf572 2.86454312858 0 83.1341669 0.5487496500000049 @@ -49760,7 +49760,7 @@ conf572 2.86454312858 0 83.1341669 0.5487496500000049 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf573 3.8409412107 0 82.574167475 1.3887487875000062 @@ -49847,7 +49847,7 @@ conf573 3.8409412107 0 82.574167475 1.3887487875000062 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf574 3.42777980223 0 82.648334175 1.2774987374999967 @@ -49934,7 +49934,7 @@ conf574 3.42777980223 0 82.648334175 1.2774987374999967 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf575 3.40403815603 0 83.51166655 0.38833344999999897 @@ -50021,7 +50021,7 @@ conf575 3.40403815603 0 83.51166655 0.38833344999999897 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf576 3.88047602719 0 82.532500075 1.4512498874999977 @@ -50108,7 +50108,7 @@ conf576 3.88047602719 0 82.532500075 1.4512498874999977 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf577 3.77830204078 0 82.647500475 1.2787492874999984 @@ -50195,7 +50195,7 @@ conf577 3.77830204078 0 82.647500475 1.2787492874999984 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf578 3.42777980223 0 82.63249865 1.3012520249999966 @@ -50282,7 +50282,7 @@ conf578 3.42777980223 0 82.63249865 1.3012520249999966 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf579 4.04812478529 0 82.325000375 1.7624994374999972 @@ -50369,7 +50369,7 @@ conf579 4.04812478529 0 82.325000375 1.7624994374999972 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf580 3.42612797341 0 82.567500125 1.3987498125000073 @@ -50456,7 +50456,7 @@ conf580 3.42612797341 0 82.567500125 1.3987498125000073 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf581 2.71103437454 0 82.644166375 1.2837504375000037 @@ -50543,7 +50543,7 @@ conf581 2.71103437454 0 82.644166375 1.2837504375000037 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf582 2.71585198734 0 82.609166725 1.3362499125000085 @@ -50630,7 +50630,7 @@ conf582 2.71585198734 0 82.609166725 1.3362499125000085 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf583 3.94925800299 0 82.51000045 1.4849993249999898 @@ -50717,7 +50717,7 @@ conf583 3.94925800299 0 82.51000045 1.4849993249999898 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf584 5.94686699811 0 81.866666425 2.450000362499992 @@ -50804,7 +50804,7 @@ conf584 5.94686699811 0 81.866666425 2.450000362499992 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf585 6.08581936049 0 81.75666715 2.6149992750000024 @@ -50891,7 +50891,7 @@ conf585 6.08581936049 0 81.75666715 2.6149992750000024 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf586 5.8159977702 0 81.991667725 2.262498412500001 @@ -50978,7 +50978,7 @@ conf586 5.8159977702 0 81.991667725 2.262498412500001 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf587 5.84038818508 0 81.9858334 2.2712498999999937 @@ -51065,7 +51065,7 @@ conf587 5.84038818508 0 81.9858334 2.2712498999999937 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf588 3.16200541504 0 81.93916565 2.34125152499999 @@ -51152,7 +51152,7 @@ conf588 3.16200541504 0 81.93916565 2.34125152499999 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf589 6.13324261561 0 82.027500175 2.2087497375 @@ -51239,7 +51239,7 @@ conf589 6.13324261561 0 82.027500175 2.2087497375 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf590 3.40390894839 0 82.398333225 1.6525001624999973 @@ -51326,7 +51326,7 @@ conf590 3.40390894839 0 82.398333225 1.6525001624999973 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf591 5.96588342505 0 81.875832675 2.436250987500003 @@ -51413,7 +51413,7 @@ conf591 5.96588342505 0 81.875832675 2.436250987500003 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf592 6.06542582931 0 81.75916635 2.6112504749999985 @@ -51500,7 +51500,7 @@ conf592 6.06542582931 0 81.75916635 2.6112504749999985 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf593 3.75822433713 0 82.285832875 1.8212506875000045 @@ -51587,7 +51587,7 @@ conf593 3.75822433713 0 82.285832875 1.8212506875000045 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf594 4.83568092525 0 82.5116665 1.482500249999994 @@ -51674,7 +51674,7 @@ conf594 4.83568092525 0 82.5116665 1.482500249999994 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf595 2.92350480095 0 82.088333675 2.117499487499998 @@ -51760,8 +51760,8 @@ conf595 2.92350480095 0 82.088333675 2.117499487499998 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf596 4.86061160899 0 82.13083405 2.0537489249999936 @@ -51848,7 +51848,7 @@ conf596 4.86061160899 0 82.13083405 2.0537489249999936 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf597 5.80915645539 0 82.07333335 2.139999975000002 @@ -51935,7 +51935,7 @@ conf597 5.80915645539 0 82.07333335 2.139999975000002 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf598 4.77219926546 0 82.145833575 2.0312496375000038 @@ -52022,7 +52022,7 @@ conf598 4.77219926546 0 82.145833575 2.0312496375000038 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf599 4.95642590255 0 82.177500425 1.9837493624999922 @@ -52109,7 +52109,7 @@ conf599 4.95642590255 0 82.177500425 1.9837493624999922 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf600 6.3125953848 0 81.8175009 2.5237486500000017 @@ -52196,7 +52196,7 @@ conf600 6.3125953848 0 81.8175009 2.5237486500000017 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf601 5.68676212758 0 81.9325008 2.3512488000000005 @@ -52283,7 +52283,7 @@ conf601 5.68676212758 0 81.9325008 2.3512488000000005 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf602 4.87802723389 0 82.616667025 1.3249994625000028 @@ -52370,7 +52370,7 @@ conf602 4.87802723389 0 82.616667025 1.3249994625000028 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf603 4.24152951084 0 82.3408333 1.7387500500000002 @@ -52457,7 +52457,7 @@ conf603 4.24152951084 0 82.3408333 1.7387500500000002 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf604 4.86742481345 0 81.79666635 2.555000475000007 @@ -52544,7 +52544,7 @@ conf604 4.86742481345 0 81.79666635 2.555000475000007 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf605 2.84079562042 0 81.7549991 2.6175013499999906 @@ -52631,7 +52631,7 @@ conf605 2.84079562042 0 81.7549991 2.6175013499999906 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf606 4.7209030777 0 82.50916645 1.4862503250000074 @@ -52718,7 +52718,7 @@ conf606 4.7209030777 0 82.50916645 1.4862503250000074 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf607 3.21429793651 0 82.037499825 2.1937502625000036 @@ -52804,8 +52804,8 @@ conf607 3.21429793651 0 82.037499825 2.1937502625000036 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf608 6.20896956368 0 81.806666375 2.540000437499991 @@ -52892,7 +52892,7 @@ conf608 6.20896956368 0 81.806666375 2.540000437499991 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf609 6.11253055706 0 81.9508336 2.3237496000000064 @@ -52979,7 +52979,7 @@ conf609 6.11253055706 0 81.9508336 2.3237496000000064 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf610 5.72814889622 0 82.070833225 2.143750162499998 @@ -53066,7 +53066,7 @@ conf610 5.72814889622 0 82.070833225 2.143750162499998 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf611 6.03912384738 0 81.9508329 2.323750650000008 @@ -53153,7 +53153,7 @@ conf611 6.03912384738 0 81.9508329 2.323750650000008 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf612 4.82854259452 0 81.999166275 2.25125058750001 @@ -53240,7 +53240,7 @@ conf612 4.82854259452 0 81.999166275 2.25125058750001 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf613 6.28005303148 0 81.999167275 2.2512490874999926 @@ -53327,7 +53327,7 @@ conf613 6.28005303148 0 81.999167275 2.2512490874999926 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf614 3.53322652378 0 81.8325003 2.50124954999999 @@ -53414,7 +53414,7 @@ conf614 3.53322652378 0 81.8325003 2.50124954999999 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf615 5.54554314448 0 82.005833575 2.2412496375000046 @@ -53501,7 +53501,7 @@ conf615 5.54554314448 0 82.005833575 2.2412496375000046 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf616 5.92460400808 0 81.8916666 2.4125001000000097 @@ -53588,7 +53588,7 @@ conf616 5.92460400808 0 81.8916666 2.4125001000000097 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf617 6.24638811174 0 81.81916665 2.5212500250000005 @@ -53675,7 +53675,7 @@ conf617 6.24638811174 0 81.81916665 2.5212500250000005 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf618 3.60668252472 0 82.4383338 1.5924993000000072 @@ -53762,7 +53762,7 @@ conf618 3.60668252472 0 82.4383338 1.5924993000000072 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf619 5.00597538776 0 82.5850009 1.3724986500000043 @@ -53849,7 +53849,7 @@ conf619 5.00597538776 0 82.5850009 1.3724986500000043 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf620 3.50178718632 0 82.0274992 2.208751200000009 @@ -53936,7 +53936,7 @@ conf620 3.50178718632 0 82.0274992 2.208751200000009 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf621 5.41498979223 0 81.926666575 2.360000137500002 @@ -54023,7 +54023,7 @@ conf621 5.41498979223 0 81.926666575 2.360000137500002 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf622 6.12963802972 0 81.939999775 2.340000337499994 @@ -54110,7 +54110,7 @@ conf622 6.12963802972 0 81.939999775 2.340000337499994 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf623 2.84806440183 0 82.35833275 1.7125008749999964 @@ -54197,7 +54197,7 @@ conf623 2.84806440183 0 82.35833275 1.7125008749999964 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf624 4.53047135975 0 82.752499075 1.1212513874999956 @@ -54284,7 +54284,7 @@ conf624 4.53047135975 0 82.752499075 1.1212513874999956 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf625 6.45517159164 0 81.904999375 2.392500937499996 @@ -54371,7 +54371,7 @@ conf625 6.45517159164 0 81.904999375 2.392500937499996 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf626 2.77533156099 0 83.51249885 0.38750114999999996 @@ -54458,7 +54458,7 @@ conf626 2.77533156099 0 83.51249885 0.38750114999999996 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf627 2.67681442383 0 83.54249975 0.3575002499999954 @@ -54544,8 +54544,8 @@ conf627 2.67681442383 0 83.54249975 0.3575002499999954 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf628 3.34131522534 0 83.388333375 0.511666624999998 @@ -54632,7 +54632,7 @@ conf628 3.34131522534 0 83.388333375 0.511666624999998 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf629 3.91196048681 0 83.385832775 0.5141672250000028 @@ -54719,7 +54719,7 @@ conf629 3.91196048681 0 83.385832775 0.5141672250000028 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf630 3.67732734091 0 83.6216665 0.27833349999999657 @@ -54806,7 +54806,7 @@ conf630 3.67732734091 0 83.6216665 0.27833349999999657 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf631 3.17642385621 0 83.9708332 0.22916679999999873 @@ -54893,7 +54893,7 @@ conf631 3.17642385621 0 83.9708332 0.22916679999999873 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf632 3.7148603211 0 83.47666625 0.42333375000000617 @@ -54980,7 +54980,7 @@ conf632 3.7148603211 0 83.47666625 0.42333375000000617 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf633 3.550044405 0 83.379166975 0.5208330249999961 @@ -55067,7 +55067,7 @@ conf633 3.550044405 0 83.379166975 0.5208330249999961 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf634 3.89833699237 0 83.465000775 0.4349992250000071 @@ -55154,7 +55154,7 @@ conf634 3.89833699237 0 83.465000775 0.4349992250000071 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf635 3.41246964545 0 83.380834025 0.5191659750000014 @@ -55241,7 +55241,7 @@ conf635 3.41246964545 0 83.380834025 0.5191659750000014 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf636 2.74258180683 0 83.38166735 0.5183326499999993 @@ -55328,7 +55328,7 @@ conf636 2.74258180683 0 83.38166735 0.5183326499999993 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf637 3.19236717145 0 83.629166025 0.2708339749999965 @@ -55415,7 +55415,7 @@ conf637 3.19236717145 0 83.629166025 0.2708339749999965 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf638 2.66820276722 0 83.504999725 0.39500027499999535 @@ -55502,7 +55502,7 @@ conf638 2.66820276722 0 83.504999725 0.39500027499999535 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf639 3.91196048681 0 83.510833025 0.3891669750000034 @@ -55589,7 +55589,7 @@ conf639 3.91196048681 0 83.510833025 0.3891669750000034 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf640 3.6987123875 0 83.473332775 0.4266672249999971 @@ -55676,7 +55676,7 @@ conf640 3.6987123875 0 83.473332775 0.4266672249999971 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf641 2.6908653869 0 83.450833775 0.44916622499999337 @@ -55763,7 +55763,7 @@ conf641 2.6908653869 0 83.450833775 0.44916622499999337 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf642 3.21949064499 0 83.1749996 0.48750059999998996 @@ -55850,7 +55850,7 @@ conf642 3.21949064499 0 83.1749996 0.48750059999998996 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf643 3.8540168116 0 83.3374999 0.5625001000000026 @@ -55937,7 +55937,7 @@ conf643 3.8540168116 0 83.3374999 0.5625001000000026 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf644 3.56007386967 0 83.6800005 0.21999949999999446 @@ -56024,7 +56024,7 @@ conf644 3.56007386967 0 83.6800005 0.21999949999999446 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf645 3.33812673629 0 83.398332625 0.5016673750000052 @@ -56111,7 +56111,7 @@ conf645 3.33812673629 0 83.398332625 0.5016673750000052 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf646 4.04048626029 0 83.339167475 0.5608325250000036 @@ -56198,7 +56198,7 @@ conf646 4.04048626029 0 83.339167475 0.5608325250000036 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf647 3.85170222236 0 83.446665375 0.4533346250000051 @@ -56285,7 +56285,7 @@ conf647 3.85170222236 0 83.446665375 0.4533346250000051 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf648 3.90854569412 0 83.439166475 0.46083352500000674 @@ -56372,7 +56372,7 @@ conf648 3.90854569412 0 83.439166475 0.46083352500000674 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf649 3.19820449438 0 83.620000625 0.27999937499999705 @@ -56459,7 +56459,7 @@ conf649 3.19820449438 0 83.620000625 0.27999937499999705 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf650 3.56169130222 0 83.4466666 0.45333339999999966 @@ -56546,7 +56546,7 @@ conf650 3.56169130222 0 83.4466666 0.45333339999999966 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf651 2.47649388334 0 83.3058326 0.5941673999999978 @@ -56632,8 +56632,8 @@ conf651 2.47649388334 0 83.3058326 0.5941673999999978 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf652 3.84319484171 0 83.5466665 0.3533334999999994 @@ -56720,7 +56720,7 @@ conf652 3.84319484171 0 83.5466665 0.3533334999999994 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf653 3.61281589655 0 83.4683321 0.4316679000000022 @@ -56807,7 +56807,7 @@ conf653 3.61281589655 0 83.4683321 0.4316679000000022 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf654 3.45781979362 0 83.395832825 0.5041671750000006 @@ -56894,7 +56894,7 @@ conf654 3.45781979362 0 83.395832825 0.5041671750000006 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf655 3.66427338342 0 83.350832525 0.5491674749999987 @@ -56981,7 +56981,7 @@ conf655 3.66427338342 0 83.350832525 0.5491674749999987 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf656 3.73380488971 0 83.555833725 0.34416627500000063 @@ -57068,7 +57068,7 @@ conf656 3.73380488971 0 83.555833725 0.34416627500000063 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf657 3.34017631976 0 83.901667075 0.29833292499999403 @@ -57155,7 +57155,7 @@ conf657 3.34017631976 0 83.901667075 0.29833292499999403 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf658 3.36416579236 0 83.4666676 0.43333240000000617 @@ -57242,7 +57242,7 @@ conf658 3.36416579236 0 83.4666676 0.43333240000000617 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf659 3.90051037143 0 83.377498975 0.5225010250000054 @@ -57329,7 +57329,7 @@ conf659 3.90051037143 0 83.377498975 0.5225010250000054 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf660 3.42864727797 0 83.366666 0.533334000000005 @@ -57416,7 +57416,7 @@ conf660 3.42864727797 0 83.366666 0.533334000000005 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf661 4.00593124871 0 83.6191658 0.28083419999999537 @@ -57503,7 +57503,7 @@ conf661 4.00593124871 0 83.6191658 0.28083419999999537 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf662 3.19455366876 0 83.6241671 0.27583290000000604 @@ -57590,7 +57590,7 @@ conf662 3.19455366876 0 83.6241671 0.27583290000000604 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf663 3.73050433946 0 83.313333525 0.5866664749999956 @@ -57677,7 +57677,7 @@ conf663 3.73050433946 0 83.313333525 0.5866664749999956 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf664 3.4587813675 0 83.399999225 0.5000007749999981 @@ -57764,7 +57764,7 @@ conf664 3.4587813675 0 83.399999225 0.5000007749999981 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf665 3.04008546135 0 83.599166675 0.3008333249999936 @@ -57851,7 +57851,7 @@ conf665 3.04008546135 0 83.599166675 0.3008333249999936 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf666 3.78060333145 0 83.4550007 0.44499930000000065 @@ -57938,7 +57938,7 @@ conf666 3.78060333145 0 83.4550007 0.44499930000000065 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf667 3.61962110787 0 83.460833125 0.4391668750000065 @@ -58025,7 +58025,7 @@ conf667 3.61962110787 0 83.460833125 0.4391668750000065 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf668 3.8810965693 0 83.2749996 0.625000399999999 @@ -58112,7 +58112,7 @@ conf668 3.8810965693 0 83.2749996 0.625000399999999 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf669 3.33861347291 0 83.625000425 0.27499957500000394 @@ -58199,7 +58199,7 @@ conf669 3.33861347291 0 83.625000425 0.27499957500000394 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf670 2.56753588524 0 83.6249996 0.2750004000000047 @@ -58285,8 +58285,8 @@ conf670 2.56753588524 0 83.6249996 0.2750004000000047 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf671 2.66966635352 0 83.505833375 0.39416662500000543 @@ -58373,7 +58373,7 @@ conf671 2.66966635352 0 83.505833375 0.39416662500000543 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf672 3.62734932603 0 83.393333 0.5066670000000016 @@ -58460,7 +58460,7 @@ conf672 3.62734932603 0 83.393333 0.5066670000000016 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf673 2.81757910985 0 83.399999475 0.5000005249999987 @@ -58546,8 +58546,8 @@ conf673 2.81757910985 0 83.399999475 0.5000005249999987 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf674 3.90909166028 0 83.35083335 0.549166649999998 @@ -58634,7 +58634,7 @@ conf674 3.90909166028 0 83.35083335 0.549166649999998 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf675 3.67412584098 0 83.899166975 0.0008330250000000428 @@ -58721,7 +58721,7 @@ conf675 3.67412584098 0 83.899166975 0.0008330250000000428 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf676 3.89711553068 0 83.522500025 0.37749997500000065 @@ -58808,7 +58808,7 @@ conf676 3.89711553068 0 83.522500025 0.37749997500000065 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf677 3.62570087392 0 83.388333425 0.511666575000001 @@ -58895,7 +58895,7 @@ conf677 3.62570087392 0 83.388333425 0.511666575000001 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf678 3.07523718155 0 82.7808333 1.0787500500000036 @@ -58982,7 +58982,7 @@ conf678 3.07523718155 0 82.7808333 1.0787500500000036 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf679 4.13425005433 0 82.42083265 1.6187510250000088 @@ -59069,7 +59069,7 @@ conf679 4.13425005433 0 82.42083265 1.6187510250000088 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf680 3.61960948722 0 82.8216667 1.0174999500000084 @@ -59156,7 +59156,7 @@ conf680 3.61960948722 0 82.8216667 1.0174999500000084 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf681 3.77350891881 0 83.4066669 0.4933330999999953 @@ -59243,7 +59243,7 @@ conf681 3.77350891881 0 83.4066669 0.4933330999999953 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf682 3.29849329899 0 82.699167225 1.2012491625000052 @@ -59330,7 +59330,7 @@ conf682 3.29849329899 0 82.699167225 1.2012491625000052 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf683 2.53422103757 0 82.725833325 1.1612500125000054 @@ -59416,8 +59416,8 @@ conf683 2.53422103757 0 82.725833325 1.1612500125000054 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf684 3.07598772844 0 82.70083335 1.1987499750000055 @@ -59504,7 +59504,7 @@ conf684 3.07598772844 0 82.70083335 1.1987499750000055 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf685 2.9984674801 0 82.483333625 1.5249995624999997 @@ -59591,7 +59591,7 @@ conf685 2.9984674801 0 82.483333625 1.5249995624999997 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf686 2.69034021678 0 82.911667225 0.8824991624999967 @@ -59677,8 +59677,8 @@ conf686 2.69034021678 0 82.911667225 0.8824991624999967 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf687 3.6121448006 0 82.63666585 1.295001225 @@ -59765,7 +59765,7 @@ conf687 3.6121448006 0 82.63666585 1.295001225 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf688 3.72756318437 0 82.55749965 1.4137505250000046 @@ -59852,7 +59852,7 @@ conf688 3.72756318437 0 82.55749965 1.4137505250000046 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf689 3.55023099238 0 82.526666675 1.459999987499998 @@ -59939,7 +59939,7 @@ conf689 3.55023099238 0 82.526666675 1.459999987499998 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf690 3.42777980223 0 82.684999375 1.2225009374999942 @@ -60026,7 +60026,7 @@ conf690 3.42777980223 0 82.684999375 1.2225009374999942 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf691 4.84352798061 0 82.450834125 1.5737488124999999 @@ -60113,7 +60113,7 @@ conf691 4.84352798061 0 82.450834125 1.5737488124999999 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf692 3.76776786291 0 82.559167175 1.4112492375000016 @@ -60200,7 +60200,7 @@ conf692 3.76776786291 0 82.559167175 1.4112492375000016 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf693 4.51358276297 0 82.44000035 1.589999474999992 @@ -60287,7 +60287,7 @@ conf693 4.51358276297 0 82.44000035 1.589999474999992 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf694 3.72458652593 0 82.6208338 1.3187493000000003 @@ -60374,7 +60374,7 @@ conf694 3.72458652593 0 82.6208338 1.3187493000000003 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf695 3.76348907779 0 82.8208328 1.0187507999999923 @@ -60461,7 +60461,7 @@ conf695 3.76348907779 0 82.8208328 1.0187507999999923 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf696 3.69520391434 0 82.767499375 1.0987509375000002 @@ -60548,7 +60548,7 @@ conf696 3.69520391434 0 82.767499375 1.0987509375000002 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf697 3.66560916957 0 82.67916755 1.2312486749999962 @@ -60635,7 +60635,7 @@ conf697 3.66560916957 0 82.67916755 1.2312486749999962 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf698 3.77027202063 0 82.488333725 1.517499412499994 @@ -60722,7 +60722,7 @@ conf698 3.77027202063 0 82.488333725 1.517499412499994 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf699 3.75097348493 0 82.7166668 1.174999800000002 @@ -60809,7 +60809,7 @@ conf699 3.75097348493 0 82.7166668 1.174999800000002 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf700 2.45929083235 0 82.681665775 1.2275013375000015 @@ -60896,7 +60896,7 @@ conf700 2.45929083235 0 82.681665775 1.2275013375000015 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf701 3.23452197803 0 82.6866664 1.2200004000000106 @@ -60983,7 +60983,7 @@ conf701 3.23452197803 0 82.6866664 1.2200004000000106 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf702 3.78589104303 0 82.468333725 1.5474994125000094 @@ -61070,7 +61070,7 @@ conf702 3.78589104303 0 82.468333725 1.5474994125000094 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf703 3.67105883538 0 82.655833575 1.266249637499996 @@ -61157,7 +61157,7 @@ conf703 3.67105883538 0 82.655833575 1.266249637499996 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf704 3.68966627876 0 82.65166675 1.272499874999994 @@ -61244,7 +61244,7 @@ conf704 3.68966627876 0 82.65166675 1.272499874999994 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf705 3.76017408275 0 82.621666375 1.3175004374999943 @@ -61331,7 +61331,7 @@ conf705 3.76017408275 0 82.621666375 1.3175004374999943 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf706 3.24530593811 0 82.58416615 1.3737507749999978 @@ -61418,7 +61418,7 @@ conf706 3.24530593811 0 82.58416615 1.3737507749999978 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf707 2.8309510337 0 82.354167125 1.7187493124999946 @@ -61505,7 +61505,7 @@ conf707 2.8309510337 0 82.354167125 1.7187493124999946 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf708 3.65387014178 0 82.29333365 1.8099995250000092 @@ -61592,7 +61592,7 @@ conf708 3.65387014178 0 82.29333365 1.8099995250000092 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf709 3.71442465807 0 82.654166575 1.2687501374999925 @@ -61679,7 +61679,7 @@ conf709 3.71442465807 0 82.654166575 1.2687501374999925 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf710 3.14961884209 0 82.7824993 1.0762510500000033 @@ -61766,7 +61766,7 @@ conf710 3.14961884209 0 82.7824993 1.0762510500000033 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf711 2.83638434085 0 82.5541664 1.4187504000000004 @@ -61853,7 +61853,7 @@ conf711 2.83638434085 0 82.5541664 1.4187504000000004 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf712 2.91601847724 0 82.573332825 1.3900007625000086 @@ -61940,7 +61940,7 @@ conf712 2.91601847724 0 82.573332825 1.3900007625000086 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf713 3.64186373922 0 82.65666695 1.2649995749999974 @@ -62027,7 +62027,7 @@ conf713 3.64186373922 0 82.65666695 1.2649995749999974 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf714 2.81202046932 0 82.533333575 1.4499996374999995 @@ -62114,7 +62114,7 @@ conf714 2.81202046932 0 82.533333575 1.4499996374999995 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf715 3.76108652872 0 82.46333325 1.5550001249999923 @@ -62201,7 +62201,7 @@ conf715 3.76108652872 0 82.46333325 1.5550001249999923 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf716 3.65879322305 0 82.645832475 1.281251287499991 @@ -62288,7 +62288,7 @@ conf716 3.65879322305 0 82.645832475 1.281251287499991 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf717 3.70045394085 0 82.521666475 1.4675002874999947 @@ -62375,7 +62375,7 @@ conf717 3.70045394085 0 82.521666475 1.4675002874999947 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf718 3.72520627099 0 82.66583245 1.2512513250000055 @@ -62462,7 +62462,7 @@ conf718 3.72520627099 0 82.66583245 1.2512513250000055 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf719 3.24756557937 0 82.614999375 1.3275009375000053 @@ -62549,7 +62549,7 @@ conf719 3.24756557937 0 82.614999375 1.3275009375000053 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf720 3.30885003192 0 82.649167625 1.276248562499994 @@ -62636,7 +62636,7 @@ conf720 3.30885003192 0 82.649167625 1.276248562499994 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf721 4.87113873596 0 82.313334075 1.7799988874999997 @@ -62723,7 +62723,7 @@ conf721 4.87113873596 0 82.313334075 1.7799988874999997 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf722 2.99807106899 0 82.5941672 1.3587491999999983 @@ -62810,7 +62810,7 @@ conf722 2.99807106899 0 82.5941672 1.3587491999999983 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf723 2.72823552171 0 82.50916655 1.486250174999995 @@ -62897,7 +62897,7 @@ conf723 2.72823552171 0 82.50916655 1.486250174999995 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf724 2.80778843881 0 82.6516676 1.2724986000000058 @@ -62984,7 +62984,7 @@ conf724 2.80778843881 0 82.6516676 1.2724986000000058 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf725 3.78397139373 0 82.5358331 1.4462503499999926 @@ -63071,7 +63071,7 @@ conf725 3.78397139373 0 82.5358331 1.4462503499999926 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf726 3.54172278638 0 82.500833175 1.4987502375000048 @@ -63158,7 +63158,7 @@ conf726 3.54172278638 0 82.500833175 1.4987502375000048 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf727 3.24678797193 0 82.6966673 1.2049990499999979 @@ -63245,7 +63245,7 @@ conf727 3.24678797193 0 82.6966673 1.2049990499999979 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf728 3.25008734698 0 82.639999975 1.2900000375000076 @@ -63332,7 +63332,7 @@ conf728 3.25008734698 0 82.639999975 1.2900000375000076 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf729 3.45364629658 0 82.469166625 1.5462500625000004 @@ -63419,7 +63419,7 @@ conf729 3.45364629658 0 82.469166625 1.5462500625000004 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf730 3.51186658454 0 82.663332725 1.2550009124999946 @@ -63506,7 +63506,7 @@ conf730 3.51186658454 0 82.663332725 1.2550009124999946 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf731 3.64560643018 0 82.46166725 1.5574991249999925 @@ -63593,7 +63593,7 @@ conf731 3.64560643018 0 82.46166725 1.5574991249999925 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf732 3.56631738856 0 82.4725002 1.5412497000000016 @@ -63680,7 +63680,7 @@ conf732 3.56631738856 0 82.4725002 1.5412497000000016 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf733 4.44852035759 0 82.728333075 1.1575003875000078 @@ -63767,7 +63767,7 @@ conf733 4.44852035759 0 82.728333075 1.1575003875000078 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf734 3.83632344443 0 81.790833625 2.563749562499993 @@ -63853,8 +63853,8 @@ conf734 3.83632344443 0 81.790833625 2.563749562499993 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf735 5.34047298453 0 81.87749975 2.4337503750000025 @@ -63941,7 +63941,7 @@ conf735 5.34047298453 0 81.87749975 2.4337503750000025 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf736 5.65205072298 0 81.823333 2.515000499999992 @@ -64028,7 +64028,7 @@ conf736 5.65205072298 0 81.823333 2.515000499999992 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf737 3.09454478777 0 82.189167275 1.966249087499996 @@ -64115,7 +64115,7 @@ conf737 3.09454478777 0 82.189167275 1.966249087499996 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf738 4.66234436114 0 81.882500525 2.4262492125000037 @@ -64202,7 +64202,7 @@ conf738 4.66234436114 0 81.882500525 2.4262492125000037 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf739 4.57988807369 0 82.72749975 1.1587503749999897 @@ -64289,7 +64289,7 @@ conf739 4.57988807369 0 82.72749975 1.1587503749999897 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf740 3.14265791087 0 82.407500075 1.6387498874999977 @@ -64376,7 +64376,7 @@ conf740 3.14265791087 0 82.407500075 1.6387498874999977 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf741 3.79743686888 0 82.55999965 1.410000525000008 @@ -64463,7 +64463,7 @@ conf741 3.79743686888 0 82.55999965 1.410000525000008 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf742 5.37418600885 0 81.905000525 2.3924992124999918 @@ -64550,7 +64550,7 @@ conf742 5.37418600885 0 81.905000525 2.3924992124999918 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf743 5.04653302026 0 82.4066666 1.6400001000000088 @@ -64637,7 +64637,7 @@ conf743 5.04653302026 0 82.4066666 1.6400001000000088 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf744 4.13261299183 0 81.8124998 2.5312503000000035 @@ -64724,7 +64724,7 @@ conf744 4.13261299183 0 81.8124998 2.5312503000000035 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf745 5.72782322817 0 81.8375002 2.4937497000000093 @@ -64811,7 +64811,7 @@ conf745 5.72782322817 0 81.8375002 2.4937497000000093 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf746 5.05336757055 0 81.8708332 2.4437501999999895 @@ -64898,7 +64898,7 @@ conf746 5.05336757055 0 81.8708332 2.4437501999999895 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf747 5.39629361273 0 81.907500475 2.3887492874999907 @@ -64985,7 +64985,7 @@ conf747 5.39629361273 0 81.907500475 2.3887492874999907 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf748 4.5639539009 0 81.868333475 2.4474997875 @@ -65072,7 +65072,7 @@ conf748 4.5639539009 0 81.868333475 2.4474997875 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf749 2.48550160702 0 81.98249965 2.276250525000009 @@ -65158,8 +65158,8 @@ conf749 2.48550160702 0 81.98249965 2.276250525000009 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf750 3.36713317478 0 82.3433332 1.7350001999999947 @@ -65245,8 +65245,8 @@ conf750 3.36713317478 0 82.3433332 1.7350001999999947 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf751 4.25330235918 0 82.050833225 2.173750162499992 @@ -65333,7 +65333,7 @@ conf751 4.25330235918 0 82.050833225 2.173750162499992 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf752 4.73785735024 0 82.43083295 1.6037505750000065 @@ -65420,7 +65420,7 @@ conf752 4.73785735024 0 82.43083295 1.6037505750000065 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf753 5.75116942647 0 81.810832725 2.533750912500004 @@ -65507,7 +65507,7 @@ conf753 5.75116942647 0 81.810832725 2.533750912500004 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf754 4.08310313137 0 81.82083325 2.5187501249999897 @@ -65594,7 +65594,7 @@ conf754 4.08310313137 0 81.82083325 2.5187501249999897 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf755 3.69771415222 0 81.771667375 2.5924989374999896 @@ -65681,7 +65681,7 @@ conf755 3.69771415222 0 81.771667375 2.5924989374999896 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf756 4.5639539009 0 81.88916665 2.4162500249999894 @@ -65768,7 +65768,7 @@ conf756 4.5639539009 0 81.88916665 2.4162500249999894 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf757 3.61284787083 0 81.839166675 2.491249987499998 @@ -65854,8 +65854,8 @@ conf757 3.61284787083 0 81.839166675 2.491249987499998 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf758 3.14794852044 0 83.18249955 0.47625067499999574 @@ -65942,7 +65942,7 @@ conf758 3.14794852044 0 83.18249955 0.47625067499999574 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf759 3.61297523069 0 83.109166025 0.5862509625000101 @@ -66029,7 +66029,7 @@ conf759 3.61297523069 0 83.109166025 0.5862509625000101 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf760 3.0859019816 0 83.58333355 0.31666644999999394 @@ -66116,7 +66116,7 @@ conf760 3.0859019816 0 83.58333355 0.31666644999999394 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf761 3.48258407644 0 83.072499225 0.6412511624999979 @@ -66203,7 +66203,7 @@ conf761 3.48258407644 0 83.072499225 0.6412511624999979 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf762 2.35625127316 0 83.69083365 0.20916634999999817 @@ -66289,8 +66289,8 @@ conf762 2.35625127316 0 83.69083365 0.20916634999999817 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf763 3.82892289873 0 83.0383339 0.6924991500000033 @@ -66377,7 +66377,7 @@ conf763 3.82892289873 0 83.0383339 0.6924991500000033 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf764 3.23798952739 0 83.382500875 0.5174991249999948 @@ -66464,7 +66464,7 @@ conf764 3.23798952739 0 83.382500875 0.5174991249999948 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf765 2.46884718101 0 83.7450001 0.1549999000000014 @@ -66550,8 +66550,8 @@ conf765 2.46884718101 0 83.7450001 0.1549999000000014 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf766 2.89946744376 0 83.59416695 0.3058330499999983 @@ -66638,7 +66638,7 @@ conf766 2.89946744376 0 83.59416695 0.3058330499999983 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf767 3.9824491999 0 83.034165925 0.6987511125000054 @@ -66725,7 +66725,7 @@ conf767 3.9824491999 0 83.034165925 0.6987511125000054 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf768 2.26810900867 0 83.441667 0.45833300000000465 @@ -66811,8 +66811,8 @@ conf768 2.26810900867 0 83.441667 0.45833300000000465 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf769 3.33442766268 0 83.4108334 0.4891665999999987 @@ -66899,7 +66899,7 @@ conf769 3.33442766268 0 83.4108334 0.4891665999999987 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf770 3.56238424705 0 83.114167025 0.5787494624999994 @@ -66986,7 +66986,7 @@ conf770 3.56238424705 0 83.114167025 0.5787494624999994 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf771 2.20628386166 0 83.8049997 0.09500030000000381 @@ -67072,8 +67072,8 @@ conf771 2.20628386166 0 83.8049997 0.09500030000000381 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf772 2.96054190359 0 83.446667125 0.4533328749999953 @@ -67160,7 +67160,7 @@ conf772 2.96054190359 0 83.446667125 0.4533328749999953 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf773 3.73646781666 0 83.210832725 0.689167274999997 @@ -67247,7 +67247,7 @@ conf773 3.73646781666 0 83.210832725 0.689167274999997 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf774 3.45655139427 0 83.3616669 0.538333099999997 @@ -67334,7 +67334,7 @@ conf774 3.45655139427 0 83.3616669 0.538333099999997 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf775 3.36781044562 0 83.41416585 0.48583414999999663 @@ -67421,7 +67421,7 @@ conf775 3.36781044562 0 83.41416585 0.48583414999999663 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf776 3.33711158829 0 83.400833525 0.4991664750000041 @@ -67508,7 +67508,7 @@ conf776 3.33711158829 0 83.400833525 0.4991664750000041 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf777 2.47649388334 0 83.294167725 0.6058322750000059 @@ -67594,8 +67594,8 @@ conf777 2.47649388334 0 83.294167725 0.6058322750000059 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf778 3.29795938368 0 83.4608333 0.4391666999999956 @@ -67682,7 +67682,7 @@ conf778 3.29795938368 0 83.4608333 0.4391666999999956 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf779 2.58901289839 0 83.224165925 0.6758340750000059 @@ -67768,8 +67768,8 @@ conf779 2.58901289839 0 83.224165925 0.6758340750000059 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf780 2.48110468365 0 83.166666625 0.5000000624999927 @@ -67855,8 +67855,8 @@ conf780 2.48110468365 0 83.166666625 0.5000000624999927 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf781 3.31743089602 0 83.52416585 0.3758341499999972 @@ -67943,7 +67943,7 @@ conf781 3.31743089602 0 83.52416585 0.3758341499999972 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf782 3.16532803511 0 83.4866665 0.4133335000000017 @@ -68030,7 +68030,7 @@ conf782 3.16532803511 0 83.4866665 0.4133335000000017 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf783 3.01209146756 0 83.39499915 0.5050008499999962 @@ -68117,7 +68117,7 @@ conf783 3.01209146756 0 83.39499915 0.5050008499999962 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf784 2.91506038019 0 83.454166025 0.44583397499999367 @@ -68204,7 +68204,7 @@ conf784 2.91506038019 0 83.454166025 0.44583397499999367 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf785 3.28419816756 0 83.439999375 0.4600006250000007 @@ -68291,7 +68291,7 @@ conf785 3.28419816756 0 83.439999375 0.4600006250000007 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf786 3.44231905129 0 83.079999875 0.6300001875000021 @@ -68378,7 +68378,7 @@ conf786 3.44231905129 0 83.079999875 0.6300001875000021 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf787 3.35346139693 0 83.4133336 0.4866663999999986 @@ -68465,7 +68465,7 @@ conf787 3.35346139693 0 83.4133336 0.4866663999999986 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf788 2.5733608853 0 83.475833225 0.42416677499999766 @@ -68552,7 +68552,7 @@ conf788 2.5733608853 0 83.475833225 0.42416677499999766 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf789 3.32302239408 0 83.463333575 0.4366664250000071 @@ -68639,7 +68639,7 @@ conf789 3.32302239408 0 83.463333575 0.4366664250000071 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf790 3.86085671563 0 83.331666975 0.5683330249999955 @@ -68726,7 +68726,7 @@ conf790 3.86085671563 0 83.331666975 0.5683330249999955 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf791 2.87563150959 0 83.081667125 0.6274993125000066 @@ -68813,7 +68813,7 @@ conf791 2.87563150959 0 83.081667125 0.6274993125000066 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf792 2.2667413651 0 83.2266664 0.6733336000000009 @@ -68899,8 +68899,8 @@ conf792 2.2667413651 0 83.2266664 0.6733336000000009 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf793 3.17877719405 0 83.10166685 0.5974997249999987 @@ -68987,7 +68987,7 @@ conf793 3.17877719405 0 83.10166685 0.5974997249999987 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf794 2.05169457405 0 83.546667075 0.353332924999998 @@ -69073,8 +69073,8 @@ conf794 2.05169457405 0 83.546667075 0.353332924999998 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf795 3.92871521165 0 83.153333725 0.519999412500006 @@ -69161,7 +69161,7 @@ conf795 3.92871521165 0 83.153333725 0.519999412500006 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf796 2.871597607 0 83.05750035 0.663749475000003 @@ -69248,7 +69248,7 @@ conf796 2.871597607 0 83.05750035 0.663749475000003 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf797 3.51071987027 0 83.0066665 0.7400002500000085 @@ -69335,7 +69335,7 @@ conf797 3.51071987027 0 83.0066665 0.7400002500000085 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf798 3.25133864889 0 82.619998725 1.3200019124999969 @@ -69422,7 +69422,7 @@ conf798 3.25133864889 0 82.619998725 1.3200019124999969 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf799 3.02951773989 0 82.507500525 1.4887492125000037 @@ -69509,7 +69509,7 @@ conf799 3.02951773989 0 82.507500525 1.4887492125000037 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf800 3.27334017121 0 83.1308336 0.5537495999999962 @@ -69596,7 +69596,7 @@ conf800 3.27334017121 0 83.1308336 0.5537495999999962 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf801 3.77431743295 0 82.847500075 0.9787498875000011 @@ -69683,7 +69683,7 @@ conf801 3.77431743295 0 82.847500075 0.9787498875000011 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf802 3.15173939185 0 82.54416675 1.4337498749999966 @@ -69770,7 +69770,7 @@ conf802 3.15173939185 0 82.54416675 1.4337498749999966 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf803 3.20342071376 0 82.911666325 0.8825005125000018 @@ -69857,7 +69857,7 @@ conf803 3.20342071376 0 82.911666325 0.8825005125000018 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf804 3.76589200269 0 82.63416685 1.2987497250000004 @@ -69944,7 +69944,7 @@ conf804 3.76589200269 0 82.63416685 1.2987497250000004 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf805 3.97450408852 0 82.2916664 1.8125004000000047 @@ -70031,7 +70031,7 @@ conf805 3.97450408852 0 82.2916664 1.8125004000000047 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf806 3.72839205344 0 82.84916615 0.9762507749999969 @@ -70118,7 +70118,7 @@ conf806 3.72839205344 0 82.84916615 0.9762507749999969 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf807 3.83937218502 0 82.662499675 1.2562504874999902 @@ -70205,7 +70205,7 @@ conf807 3.83937218502 0 82.662499675 1.2562504874999902 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf808 3.46318308501 0 82.660000625 1.2599990625000075 @@ -70292,7 +70292,7 @@ conf808 3.46318308501 0 82.660000625 1.2599990625000075 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf809 2.65194478828 0 82.9966669 0.7549996500000091 @@ -70378,8 +70378,8 @@ conf809 2.65194478828 0 82.9966669 0.7549996500000091 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf810 3.81868246747 0 82.676666625 1.2350000625000064 @@ -70466,7 +70466,7 @@ conf810 3.81868246747 0 82.676666625 1.2350000625000064 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf811 2.668045357 0 82.687500775 1.2187488374999944 @@ -70552,8 +70552,8 @@ conf811 2.668045357 0 82.687500775 1.2187488374999944 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf812 3.6406305738 0 83.044166575 0.6837501374999917 @@ -70640,7 +70640,7 @@ conf812 3.6406305738 0 83.044166575 0.6837501374999917 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf813 3.88869175025 0 82.619999925 1.3200001124999972 @@ -70727,7 +70727,7 @@ conf813 3.88869175025 0 82.619999925 1.3200001124999972 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf814 3.31960896231 0 82.582499525 1.3762507124999956 @@ -70814,7 +70814,7 @@ conf814 3.31960896231 0 82.582499525 1.3762507124999956 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf815 3.74455317852 0 83.011666325 0.7325005125000104 @@ -70901,7 +70901,7 @@ conf815 3.74455317852 0 83.011666325 0.7325005125000104 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf816 3.5137137361 0 82.635833375 1.2962499374999936 @@ -70988,7 +70988,7 @@ conf816 3.5137137361 0 82.635833375 1.2962499374999936 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf817 3.47803508789 0 82.671666975 1.2424995375000094 @@ -71075,7 +71075,7 @@ conf817 3.47803508789 0 82.671666975 1.2424995375000094 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf818 3.88869175025 0 82.5324997 1.4512504499999963 @@ -71162,7 +71162,7 @@ conf818 3.88869175025 0 82.5324997 1.4512504499999963 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf819 3.99015746851 0 83.32999975 0.5700002500000011 @@ -71249,7 +71249,7 @@ conf819 3.99015746851 0 83.32999975 0.5700002500000011 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf820 3.47633448052 0 82.70916585 1.1862512249999924 @@ -71336,7 +71336,7 @@ conf820 3.47633448052 0 82.70916585 1.1862512249999924 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf821 3.81672942872 0 82.66249985 1.2562502249999952 @@ -71423,7 +71423,7 @@ conf821 3.81672942872 0 82.66249985 1.2562502249999952 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf822 3.68765396647 0 83.035833175 0.6962502375000099 @@ -71510,7 +71510,7 @@ conf822 3.68765396647 0 83.035833175 0.6962502375000099 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf823 2.62468751826 0 83.85666555 0.04333444999999758 @@ -71596,8 +71596,8 @@ conf823 2.62468751826 0 83.85666555 0.04333444999999758 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf824 3.24530593811 0 82.596666725 1.3549999124999914 @@ -71684,7 +71684,7 @@ conf824 3.24530593811 0 82.596666725 1.3549999124999914 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf825 3.78887649269 0 82.6574997 1.2637504499999963 @@ -71771,7 +71771,7 @@ conf825 3.78887649269 0 82.6574997 1.2637504499999963 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf826 3.27825362995 0 83.098333475 0.602499787499994 @@ -71858,7 +71858,7 @@ conf826 3.27825362995 0 83.098333475 0.602499787499994 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf827 4.04464703118 0 82.658332375 1.2625014374999992 @@ -71945,7 +71945,7 @@ conf827 4.04464703118 0 82.658332375 1.2625014374999992 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf828 4.0080827723 0 82.967499475 0.7987507875000048 @@ -72032,7 +72032,7 @@ conf828 4.0080827723 0 82.967499475 0.7987507875000048 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf829 3.81868246747 0 82.6308328 1.3037508000000102 @@ -72119,7 +72119,7 @@ conf829 3.81868246747 0 82.6308328 1.3037508000000102 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf830 2.97178100595 0 82.274999925 1.8375001124999955 @@ -72206,7 +72206,7 @@ conf830 2.97178100595 0 82.274999925 1.8375001124999955 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf831 3.6270584119 0 82.60749975 1.3387503749999965 @@ -72293,7 +72293,7 @@ conf831 3.6270584119 0 82.60749975 1.3387503749999965 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf832 4.0570665511 0 82.461666575 1.557500137500007 @@ -72380,7 +72380,7 @@ conf832 4.0570665511 0 82.461666575 1.557500137500007 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf833 3.28457771051 0 83.106666975 0.589999537500006 @@ -72467,7 +72467,7 @@ conf833 3.28457771051 0 83.106666975 0.589999537500006 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf834 3.82073461682 0 82.628333725 1.3074994124999932 @@ -72554,7 +72554,7 @@ conf834 3.82073461682 0 82.628333725 1.3074994124999932 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf835 2.81694416161 0 82.9158347 0.8762479499999927 @@ -72641,7 +72641,7 @@ conf835 2.81694416161 0 82.9158347 0.8762479499999927 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf836 2.71225929471 0 82.91 0.8850000000000051 @@ -72727,8 +72727,8 @@ conf836 2.71225929471 0 82.91 0.8850000000000051 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf837 3.2770273421 0 82.669999925 1.2450001125000014 @@ -72815,7 +72815,7 @@ conf837 3.2770273421 0 82.669999925 1.2450001125000014 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf838 3.34881572161 0 82.45500065 1.5674990249999965 @@ -72902,7 +72902,7 @@ conf838 3.34881572161 0 82.45500065 1.5674990249999965 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf839 3.95513111625 0 82.567500925 1.3987486124999933 @@ -72989,7 +72989,7 @@ conf839 3.95513111625 0 82.567500925 1.3987486124999933 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf840 3.07936522986 0 82.384166725 1.6737499125 @@ -73076,7 +73076,7 @@ conf840 3.07936522986 0 82.384166725 1.6737499125 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf841 3.50904748658 0 82.551667075 1.4224993875000038 @@ -73163,7 +73163,7 @@ conf841 3.50904748658 0 82.551667075 1.4224993875000038 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf842 3.78763902744 0 82.524167675 1.4637484874999984 @@ -73250,7 +73250,7 @@ conf842 3.78763902744 0 82.524167675 1.4637484874999984 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf843 3.18013008622 0 82.7024998 1.1962503000000027 @@ -73337,7 +73337,7 @@ conf843 3.18013008622 0 82.7024998 1.1962503000000027 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf844 4.03626127129 0 82.5291655 1.4562517499999927 @@ -73424,7 +73424,7 @@ conf844 4.03626127129 0 82.5291655 1.4562517499999927 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf845 2.58905763401 0 82.68250105 1.2262484250000014 @@ -73510,8 +73510,8 @@ conf845 2.58905763401 0 82.68250105 1.2262484250000014 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf846 3.99304048761 0 82.58416755 1.3737486749999945 @@ -73598,7 +73598,7 @@ conf846 3.99304048761 0 82.58416755 1.3737486749999945 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf847 3.99528436929 0 82.64749975 1.2787503750000084 @@ -73685,7 +73685,7 @@ conf847 3.99528436929 0 82.64749975 1.2787503750000084 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf848 3.78763902744 0 82.690000125 1.2149998125000039 @@ -73772,7 +73772,7 @@ conf848 3.78763902744 0 82.690000125 1.2149998125000039 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf849 3.68134508359 0 82.605832975 1.3412505375000023 @@ -73859,7 +73859,7 @@ conf849 3.68134508359 0 82.605832975 1.3412505375000023 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf850 3.9513615958 0 82.637499625 1.2937505624999943 @@ -73946,7 +73946,7 @@ conf850 3.9513615958 0 82.637499625 1.2937505624999943 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf851 3.74332022224 0 81.716666775 2.6749998375000104 @@ -74033,7 +74033,7 @@ conf851 3.74332022224 0 81.716666775 2.6749998375000104 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf852 2.81716153181 0 82.6441663 1.2837505500000077 @@ -74120,7 +74120,7 @@ conf852 2.81716153181 0 82.6441663 1.2837505500000077 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf853 6.16037265169 0 81.943333075 2.3350003875000027 @@ -74207,7 +74207,7 @@ conf853 6.16037265169 0 81.943333075 2.3350003875000027 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf854 3.37055296019 0 81.936666475 2.3450002875000067 @@ -74293,8 +74293,8 @@ conf854 3.37055296019 0 81.936666475 2.3450002875000067 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf855 4.90505757241 0 82.631667175 1.302499237499994 @@ -74381,7 +74381,7 @@ conf855 4.90505757241 0 82.631667175 1.302499237499994 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf856 4.91731542308 0 82.125833525 2.0612497124999933 @@ -74468,7 +74468,7 @@ conf856 4.91731542308 0 82.125833525 2.0612497124999933 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf857 5.10882133745 0 82.5716667 1.3924999500000084 @@ -74555,7 +74555,7 @@ conf857 5.10882133745 0 82.5716667 1.3924999500000084 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf858 5.91056159856 0 82.000000275 2.2499995874999925 @@ -74642,7 +74642,7 @@ conf858 5.91056159856 0 82.000000275 2.2499995874999925 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf859 3.59399896186 0 81.861666875 2.457499687500004 @@ -74729,7 +74729,7 @@ conf859 3.59399896186 0 81.861666875 2.457499687500004 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf860 3.8438175556 0 81.89499995 2.4075000750000015 @@ -74815,8 +74815,8 @@ conf860 3.8438175556 0 81.89499995 2.4075000750000015 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf861 5.10882133745 0 82.03250025 2.2012496250000027 @@ -74903,7 +74903,7 @@ conf861 5.10882133745 0 82.03250025 2.2012496250000027 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf862 3.24518027232 0 82.024166425 2.2137503624999937 @@ -74990,7 +74990,7 @@ conf862 3.24518027232 0 82.024166425 2.2137503624999937 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf863 4.45286607582 0 82.109999625 2.0850005624999994 @@ -75077,7 +75077,7 @@ conf863 4.45286607582 0 82.109999625 2.0850005624999994 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf864 2.66745181515 0 81.925832975 2.3612505374999913 @@ -75164,7 +75164,7 @@ conf864 2.66745181515 0 81.925832975 2.3612505374999913 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf865 3.8438175556 0 81.93416575 2.348751374999992 @@ -75251,7 +75251,7 @@ conf865 3.8438175556 0 81.93416575 2.348751374999992 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf866 2.75141950611 0 83.235000425 0.6649995750000045 @@ -75337,8 +75337,8 @@ conf866 2.75141950611 0 83.235000425 0.6649995750000045 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf867 5.02396619108 0 81.964998375 2.30250243750001 @@ -75425,7 +75425,7 @@ conf867 5.02396619108 0 81.964998375 2.30250243750001 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf868 6.30850043881 0 81.839167875 2.4912481874999983 @@ -75512,7 +75512,7 @@ conf868 6.30850043881 0 81.839167875 2.4912481874999983 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf869 2.71089204705 0 82.777500525 1.0837492125000097 @@ -75598,8 +75598,8 @@ conf869 2.71089204705 0 82.777500525 1.0837492125000097 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf870 3.37792709756 0 82.785001075 1.072498387500005 @@ -75686,7 +75686,7 @@ conf870 3.37792709756 0 82.785001075 1.072498387500005 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf871 6.20896956368 0 81.805832675 2.5412509874999927 @@ -75773,7 +75773,7 @@ conf871 6.20896956368 0 81.805832675 2.5412509874999927 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf872 6.093601141 0 81.809166125 2.5362508124999934 @@ -75860,7 +75860,7 @@ conf872 6.093601141 0 81.809166125 2.5362508124999934 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf873 6.17472027945 0 81.960000325 2.3099995125000063 @@ -75947,7 +75947,7 @@ conf873 6.17472027945 0 81.960000325 2.3099995125000063 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf874 4.37093352457 0 82.7258336 1.1612495999999979 @@ -76034,7 +76034,7 @@ conf874 4.37093352457 0 82.7258336 1.1612495999999979 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf875 5.7215125746 0 81.9375002 2.3437496999999965 @@ -76121,7 +76121,7 @@ conf875 5.7215125746 0 81.9375002 2.3437496999999965 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf876 4.84163016407 0 82.084999725 2.1225004124999955 @@ -76208,7 +76208,7 @@ conf876 4.84163016407 0 82.084999725 2.1225004124999955 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf877 6.26209688761 0 81.819166775 2.521249837500001 @@ -76295,7 +76295,7 @@ conf877 6.26209688761 0 81.819166775 2.521249837500001 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf878 4.85663126202 0 82.5866659 1.3700011500000002 @@ -76382,7 +76382,7 @@ conf878 4.85663126202 0 82.5866659 1.3700011500000002 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf879 6.20896956368 0 81.853333725 2.4699994125000018 @@ -76469,7 +76469,7 @@ conf879 6.20896956368 0 81.853333725 2.4699994125000018 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf880 3.15632103776 0 83.2983335 0.6016665000000018 @@ -76556,7 +76556,7 @@ conf880 3.15632103776 0 83.2983335 0.6016665000000018 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf881 3.80890289184 0 81.777500975 2.583748537500007 @@ -76643,7 +76643,7 @@ conf881 3.80890289184 0 81.777500975 2.583748537500007 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf882 4.31381299305 0 83.01000025 0.7349996249999933 @@ -76730,7 +76730,7 @@ conf882 4.31381299305 0 83.01000025 0.7349996249999933 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf883 3.79575894348 0 82.3091669 1.7862496500000091 @@ -76817,7 +76817,7 @@ conf883 3.79575894348 0 82.3091669 1.7862496500000091 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf884 6.01904155181 0 81.837499775 2.4937503375000034 @@ -76904,7 +76904,7 @@ conf884 6.01904155181 0 81.837499775 2.4937503375000034 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf885 3.09816021067 0 82.4133333 1.6300000499999925 @@ -76991,7 +76991,7 @@ conf885 3.09816021067 0 82.4133333 1.6300000499999925 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf886 2.53423568639 0 82.0749991 2.137501350000001 @@ -77078,7 +77078,7 @@ conf886 2.53423568639 0 82.0749991 2.137501350000001 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf887 3.92665454118 0 81.950833375 2.323749937499997 @@ -77165,7 +77165,7 @@ conf887 3.92665454118 0 81.950833375 2.323749937499997 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf888 5.43694808897 0 81.838333275 2.492500087500005 @@ -77252,7 +77252,7 @@ conf888 5.43694808897 0 81.838333275 2.492500087500005 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf889 5.43694808897 0 81.915832175 2.3762517374999916 @@ -77339,7 +77339,7 @@ conf889 5.43694808897 0 81.915832175 2.3762517374999916 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf890 3.72936814517 0 83.34833285 0.5516671499999944 @@ -77426,7 +77426,7 @@ conf890 3.72936814517 0 83.34833285 0.5516671499999944 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf891 3.50391619891 0 83.436665475 0.46333452500000194 @@ -77513,7 +77513,7 @@ conf891 3.50391619891 0 83.436665475 0.46333452500000194 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf892 3.31248767312 0 83.2858336 0.6141663999999963 @@ -77600,7 +77600,7 @@ conf892 3.31248767312 0 83.2858336 0.6141663999999963 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf893 3.7499123954 0 83.4508327 0.4491672999999935 @@ -77687,7 +77687,7 @@ conf893 3.7499123954 0 83.4508327 0.4491672999999935 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf894 3.44261287135 0 83.47166675 0.4283332500000029 @@ -77774,7 +77774,7 @@ conf894 3.44261287135 0 83.47166675 0.4283332500000029 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf895 3.69707003451 0 83.4391661 0.4608339000000058 @@ -77861,7 +77861,7 @@ conf895 3.69707003451 0 83.4391661 0.4608339000000058 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf896 3.51958469414 0 83.454166025 0.44583397499999367 @@ -77948,7 +77948,7 @@ conf896 3.51958469414 0 83.454166025 0.44583397499999367 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf897 3.3941522599 0 83.4208333 0.47916670000000183 @@ -78035,7 +78035,7 @@ conf897 3.3941522599 0 83.4208333 0.47916670000000183 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf898 2.50486697002 0 83.303333425 0.5966665749999948 @@ -78121,8 +78121,8 @@ conf898 2.50486697002 0 83.303333425 0.5966665749999948 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf899 3.69734139746 0 83.716666025 0.18333397500000503 @@ -78209,7 +78209,7 @@ conf899 3.69734139746 0 83.716666025 0.18333397500000503 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf900 3.53539394872 0 83.4691667 0.43083329999999764 @@ -78296,7 +78296,7 @@ conf900 3.53539394872 0 83.4691667 0.43083329999999764 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf901 2.81481595232 0 83.30999995 0.5900000499999948 @@ -78383,7 +78383,7 @@ conf901 2.81481595232 0 83.30999995 0.5900000499999948 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf902 3.59517889854 0 83.304999975 0.5950000249999988 @@ -78470,7 +78470,7 @@ conf902 3.59517889854 0 83.304999975 0.5950000249999988 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf903 2.7921225095 0 83.417500225 0.4824997750000023 @@ -78557,7 +78557,7 @@ conf903 2.7921225095 0 83.417500225 0.4824997750000023 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf904 3.63331458464 0 83.449166125 0.45083387499999505 @@ -78644,7 +78644,7 @@ conf904 3.63331458464 0 83.449166125 0.45083387499999505 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf905 3.47299403328 0 83.55583285 0.3441671499999984 @@ -78731,7 +78731,7 @@ conf905 3.47299403328 0 83.55583285 0.3441671499999984 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf906 3.44718329478 0 83.274167675 0.625832324999999 @@ -78818,7 +78818,7 @@ conf906 3.44718329478 0 83.274167675 0.625832324999999 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf907 3.68820488322 0 83.474166075 0.4258339250000006 @@ -78905,7 +78905,7 @@ conf907 3.68820488322 0 83.474166075 0.4258339250000006 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf908 3.54345165631 0 83.48249965 0.41750035000000596 @@ -78992,7 +78992,7 @@ conf908 3.54345165631 0 83.48249965 0.41750035000000596 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf909 3.71451789948 0 83.427500375 0.4724996250000061 @@ -79079,7 +79079,7 @@ conf909 3.71451789948 0 83.427500375 0.4724996250000061 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf910 3.41501347545 0 83.6241676 0.2758323999999931 @@ -79166,7 +79166,7 @@ conf910 3.41501347545 0 83.6241676 0.2758323999999931 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf911 2.35625127316 0 83.7183326 0.18166740000000348 @@ -79252,8 +79252,8 @@ conf911 2.35625127316 0 83.7183326 0.18166740000000348 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf912 3.11476765562 0 83.430000525 0.46999947500000305 @@ -79340,7 +79340,7 @@ conf912 3.11476765562 0 83.430000525 0.46999947500000305 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf913 2.54844128088 0 83.651667225 0.24833277500000295 @@ -79426,8 +79426,8 @@ conf913 2.54844128088 0 83.651667225 0.24833277500000295 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf914 3.47560954513 0 83.185000275 0.4724995875000104 @@ -79514,7 +79514,7 @@ conf914 3.47560954513 0 83.185000275 0.4724995875000104 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf915 2.47649388334 0 83.309999775 0.5900002250000057 @@ -79600,8 +79600,8 @@ conf915 2.47649388334 0 83.309999775 0.5900002250000057 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf916 3.63349441695 0 83.54916695 0.35083305 @@ -79688,7 +79688,7 @@ conf916 3.63349441695 0 83.54916695 0.35083305 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf917 3.30388414795 0 83.359167275 0.5408327249999957 @@ -79775,7 +79775,7 @@ conf917 3.30388414795 0 83.359167275 0.5408327249999957 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf918 3.77986574157 0 83.4441673 0.4558326999999963 @@ -79862,7 +79862,7 @@ conf918 3.77986574157 0 83.4441673 0.4558326999999963 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf919 3.67920746343 0 83.2683346 0.6316653999999972 @@ -79949,7 +79949,7 @@ conf919 3.67920746343 0 83.2683346 0.6316653999999972 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf920 2.70850046994 0 83.616666525 0.2833334750000006 @@ -80035,5 +80035,5 @@ conf920 2.70850046994 0 83.616666525 0.2833334750000006 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_promise_confs_batch220_single.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_promise_confs_batch220_single.txt index 457afdf647..bf55690f22 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_promise_confs_batch220_single.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_promise_confs_batch220_single.txt @@ -170,7 +170,7 @@ conf1 3.42331509807 0 83.145832875 0.5312506875000054 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf2 4.1366775368 0 83.3483332 0.551666800000001 @@ -257,7 +257,7 @@ conf2 4.1366775368 0 83.3483332 0.551666800000001 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf3 3.90759061734 0 83.230833025 0.6691669750000045 @@ -344,7 +344,7 @@ conf3 3.90759061734 0 83.230833025 0.6691669750000045 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf4 4.06315650642 0 83.361666275 0.5383337249999954 @@ -431,7 +431,7 @@ conf4 4.06315650642 0 83.361666275 0.5383337249999954 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf5 2.4166360786 0 83.281667125 0.6183328750000016 @@ -518,7 +518,7 @@ conf5 2.4166360786 0 83.281667125 0.6183328750000016 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf6 3.63411042957 0 83.1183345 0.5724982499999953 @@ -605,7 +605,7 @@ conf6 3.63411042957 0 83.1183345 0.5724982499999953 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf7 3.91784790592 0 83.264166875 0.6358331249999992 @@ -692,7 +692,7 @@ conf7 3.91784790592 0 83.264166875 0.6358331249999992 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf8 3.96269177469 0 83.341666525 0.5583334750000063 @@ -779,7 +779,7 @@ conf8 3.96269177469 0 83.341666525 0.5583334750000063 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf9 2.55274654709 0 83.17749965 0.4837505249999978 @@ -866,7 +866,7 @@ conf9 2.55274654709 0 83.17749965 0.4837505249999978 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf10 3.89210199315 0 83.06833345 0.6474998250000041 @@ -953,7 +953,7 @@ conf10 3.89210199315 0 83.06833345 0.6474998250000041 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf11 3.34244287421 0 83.62333335 0.27666665000000423 @@ -1040,7 +1040,7 @@ conf11 3.34244287421 0 83.62333335 0.27666665000000423 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf12 3.72902304785 0 83.500832325 0.3991676749999954 @@ -1127,7 +1127,7 @@ conf12 3.72902304785 0 83.500832325 0.3991676749999954 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf13 4.13863177846 0 83.1449996 0.5325005999999917 @@ -1214,7 +1214,7 @@ conf13 4.13863177846 0 83.1449996 0.5325005999999917 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf14 3.70143021825 0 83.326666825 0.5733331749999963 @@ -1301,7 +1301,7 @@ conf14 3.70143021825 0 83.326666825 0.5733331749999963 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf15 4.03977337841 0 83.367499925 0.5325000749999959 @@ -1388,7 +1388,7 @@ conf15 4.03977337841 0 83.367499925 0.5325000749999959 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf16 3.66416137792 0 83.295833075 0.6041669249999956 @@ -1475,7 +1475,7 @@ conf16 3.66416137792 0 83.295833075 0.6041669249999956 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf17 2.73203359573 0 83.29333355 0.6066664500000002 @@ -1562,7 +1562,7 @@ conf17 2.73203359573 0 83.29333355 0.6066664500000002 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf18 2.73838278798 0 83.118333625 0.572499562499992 @@ -1649,7 +1649,7 @@ conf18 2.73838278798 0 83.118333625 0.572499562499992 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf19 4.12989451981 0 83.114166825 0.5787497625000029 @@ -1736,7 +1736,7 @@ conf19 4.12989451981 0 83.114166825 0.5787497625000029 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf20 2.74356561405 0 83.51749975 0.3825002500000011 @@ -1823,7 +1823,7 @@ conf20 2.74356561405 0 83.51749975 0.3825002500000011 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf21 2.76530651905 0 83.3750001 0.524999900000006 @@ -1910,7 +1910,7 @@ conf21 2.76530651905 0 83.3750001 0.524999900000006 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf22 3.10776321973 0 83.51166675 0.38833324999999663 @@ -1997,7 +1997,7 @@ conf22 3.10776321973 0 83.51166675 0.38833324999999663 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf23 4.1076173021 0 83.10333365 0.5949995250000057 @@ -2084,7 +2084,7 @@ conf23 4.1076173021 0 83.10333365 0.5949995250000057 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf24 4.11895311755 0 83.202500325 0.6974996749999974 @@ -2171,7 +2171,7 @@ conf24 4.11895311755 0 83.202500325 0.6974996749999974 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf25 3.59777234653 0 83.3741667 0.5258332999999965 @@ -2258,7 +2258,7 @@ conf25 3.59777234653 0 83.3741667 0.5258332999999965 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf26 3.41043775226 0 83.024166725 0.7137499124999991 @@ -2345,7 +2345,7 @@ conf26 3.41043775226 0 83.024166725 0.7137499124999991 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf27 3.68082052931 0 83.4191672 0.48083279999999606 @@ -2432,7 +2432,7 @@ conf27 3.68082052931 0 83.4191672 0.48083279999999606 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf28 3.69150234657 0 83.174999575 0.4875006374999984 @@ -2519,7 +2519,7 @@ conf28 3.69150234657 0 83.174999575 0.4875006374999984 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf29 2.86573918703 0 83.597500175 0.3024998250000067 @@ -2606,7 +2606,7 @@ conf29 2.86573918703 0 83.597500175 0.3024998250000067 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf30 3.87625835913 0 83.178333525 0.482499712500001 @@ -2693,7 +2693,7 @@ conf30 3.87625835913 0 83.178333525 0.482499712500001 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf31 4.09035639381 0 83.2525001 0.647499899999994 @@ -2780,7 +2780,7 @@ conf31 4.09035639381 0 83.2525001 0.647499899999994 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf32 3.87939285722 0 83.3758335 0.5241665000000012 @@ -2867,7 +2867,7 @@ conf32 3.87939285722 0 83.3758335 0.5241665000000012 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf33 3.96976512759 0 83.368333725 0.5316662750000006 @@ -2954,7 +2954,7 @@ conf33 3.96976512759 0 83.368333725 0.5316662750000006 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf34 3.13322137457 0 83.023333725 0.7149994124999992 @@ -3041,7 +3041,7 @@ conf34 3.13322137457 0 83.023333725 0.7149994124999992 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf35 4.08700450168 0 83.1675001 0.49874985000000294 @@ -3128,7 +3128,7 @@ conf35 4.08700450168 0 83.1675001 0.49874985000000294 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf36 3.99607976488 0 83.600831925 0.29916807500000575 @@ -3215,7 +3215,7 @@ conf36 3.99607976488 0 83.600831925 0.29916807500000575 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf37 3.7916732297 0 83.1049997 0.59250045000001 @@ -3302,7 +3302,7 @@ conf37 3.7916732297 0 83.1049997 0.59250045000001 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf38 2.59232682658 0 83.86500035 0.034999649999997495 @@ -3389,7 +3389,7 @@ conf38 2.59232682658 0 83.86500035 0.034999649999997495 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf39 4.16703365446 0 83.2283323 0.6716676999999948 @@ -3476,7 +3476,7 @@ conf39 4.16703365446 0 83.2283323 0.6716676999999948 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf40 4.15167888321 0 83.116667925 0.5749981124999977 @@ -3563,7 +3563,7 @@ conf40 4.15167888321 0 83.116667925 0.5749981124999977 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf41 4.09821993905 0 83.221666325 0.678333674999999 @@ -3650,7 +3650,7 @@ conf41 4.09821993905 0 83.221666325 0.678333674999999 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf42 3.59717623958 0 83.3591672 0.5408327999999983 @@ -3737,7 +3737,7 @@ conf42 3.59717623958 0 83.3591672 0.5408327999999983 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf43 3.91417949819 0 83.1383341 0.5424988500000083 @@ -3824,7 +3824,7 @@ conf43 3.91417949819 0 83.1383341 0.5424988500000083 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf44 2.63166503619 0 83.59166585 0.30833415000000175 @@ -3910,8 +3910,8 @@ conf44 2.63166503619 0 83.59166585 0.30833415000000175 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf45 4.07353955232 0 83.4000008 0.4999991999999992 @@ -3998,7 +3998,7 @@ conf45 4.07353955232 0 83.4000008 0.4999991999999992 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf46 4.00603970123 0 83.269166375 0.6308336250000025 @@ -4085,7 +4085,7 @@ conf46 4.00603970123 0 83.269166375 0.6308336250000025 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf47 4.10554485592 0 83.44 0.4600000000000023 @@ -4172,7 +4172,7 @@ conf47 4.10554485592 0 83.44 0.4600000000000023 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf48 3.89291700398 0 83.5358329 0.3641670999999974 @@ -4259,7 +4259,7 @@ conf48 3.89291700398 0 83.5358329 0.3641670999999974 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf49 2.87571062424 0 83.66583255 0.2341674499999954 @@ -4346,7 +4346,7 @@ conf49 2.87571062424 0 83.66583255 0.2341674499999954 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf50 2.92776678825 0 83.169166925 0.49624961250000155 @@ -4432,8 +4432,8 @@ conf50 2.92776678825 0 83.169166925 0.49624961250000155 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf51 3.82237267657 0 83.392500375 0.5074996250000027 @@ -4520,7 +4520,7 @@ conf51 3.82237267657 0 83.392500375 0.5074996250000027 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf52 3.45538529072 0 83.7458343 0.15416570000000152 @@ -4607,7 +4607,7 @@ conf52 3.45538529072 0 83.7458343 0.15416570000000152 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf53 2.96953466999 0 83.765000175 0.13499982500000274 @@ -4694,7 +4694,7 @@ conf53 2.96953466999 0 83.765000175 0.13499982500000274 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf54 2.43878058591 0 83.0391674 0.691248900000005 @@ -4781,7 +4781,7 @@ conf54 2.43878058591 0 83.0391674 0.691248900000005 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf55 4.07628888308 0 83.183333275 0.4750000875000069 @@ -4868,7 +4868,7 @@ conf55 4.07628888308 0 83.183333275 0.4750000875000069 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf56 3.41015160804 0 83.22333395 0.6766660500000029 @@ -4955,7 +4955,7 @@ conf56 3.41015160804 0 83.22333395 0.6766660500000029 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf57 3.79073162281 0 83.43416615 0.4658338500000042 @@ -5042,7 +5042,7 @@ conf57 3.79073162281 0 83.43416615 0.4658338500000042 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf58 2.47370113984 0 83.025833375 0.7112499374999928 @@ -5129,7 +5129,7 @@ conf58 2.47370113984 0 83.025833375 0.7112499374999928 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf59 2.94113112878 0 83.20000055 0.6999994500000014 @@ -5215,8 +5215,8 @@ conf59 2.94113112878 0 83.20000055 0.6999994500000014 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf60 4.16196961351 0 83.16083385 0.5087492249999954 @@ -5303,7 +5303,7 @@ conf60 4.16196961351 0 83.16083385 0.5087492249999954 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf61 3.17265163759 0 83.2299996 0.6700004000000007 @@ -5389,8 +5389,8 @@ conf61 3.17265163759 0 83.2299996 0.6700004000000007 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf62 3.4099831354 0 83.32749995 0.5725000499999965 @@ -5477,7 +5477,7 @@ conf62 3.4099831354 0 83.32749995 0.5725000499999965 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf63 2.39276147412 0 83.225833475 0.6741665249999983 @@ -5564,7 +5564,7 @@ conf63 2.39276147412 0 83.225833475 0.6741665249999983 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf64 2.67074720888 0 83.5775003 0.32249970000000305 @@ -5650,8 +5650,8 @@ conf64 2.67074720888 0 83.5775003 0.32249970000000305 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf65 3.97519411144 0 83.197499875 0.45375018749999185 @@ -5738,7 +5738,7 @@ conf65 3.97519411144 0 83.197499875 0.45375018749999185 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf66 3.76312088817 0 83.3708336 0.5291664000000026 @@ -5825,7 +5825,7 @@ conf66 3.76312088817 0 83.3708336 0.5291664000000026 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf67 2.9437987388 0 83.13249965 0.5512505250000004 @@ -5912,7 +5912,7 @@ conf67 2.9437987388 0 83.13249965 0.5512505250000004 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf68 3.87876555411 0 83.5975005 0.3024995000000047 @@ -5999,7 +5999,7 @@ conf68 3.87876555411 0 83.5975005 0.3024995000000047 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf69 3.85398606439 0 83.07749975 0.6337503749999982 @@ -6086,7 +6086,7 @@ conf69 3.85398606439 0 83.07749975 0.6337503749999982 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf70 3.58784253707 0 83.54999975 0.35000025000000223 @@ -6173,7 +6173,7 @@ conf70 3.58784253707 0 83.54999975 0.35000025000000223 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf71 2.95980019108 0 83.6841669 0.2158331000000061 @@ -6260,7 +6260,7 @@ conf71 2.95980019108 0 83.6841669 0.2158331000000061 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf72 3.75261917668 0 83.514166775 0.3858332249999933 @@ -6347,7 +6347,7 @@ conf72 3.75261917668 0 83.514166775 0.3858332249999933 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf73 2.71776955648 0 83.418334 0.4816659999999985 @@ -6433,8 +6433,8 @@ conf73 2.71776955648 0 83.418334 0.4816659999999985 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf74 3.52191903276 0 83.51499955 0.38500045000000116 @@ -6521,7 +6521,7 @@ conf74 3.52191903276 0 83.51499955 0.38500045000000116 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf75 3.84413336023 0 83.466666475 0.43333352500000333 @@ -6608,7 +6608,7 @@ conf75 3.84413336023 0 83.466666475 0.43333352500000333 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf76 3.87298002921 0 83.2308336 0.6691664000000032 @@ -6695,7 +6695,7 @@ conf76 3.87298002921 0 83.2308336 0.6691664000000032 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf77 2.74417093221 0 83.5499997 0.35000029999999926 @@ -6782,7 +6782,7 @@ conf77 2.74417093221 0 83.5499997 0.35000029999999926 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf78 2.22642162255 0 83.495000025 0.40499997500000406 @@ -6869,7 +6869,7 @@ conf78 2.22642162255 0 83.495000025 0.40499997500000406 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf79 3.92399759934 0 83.24166705 0.6583329499999963 @@ -6956,7 +6956,7 @@ conf79 3.92399759934 0 83.24166705 0.6583329499999963 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf80 2.45943734525 0 83.33916645 0.5608335500000067 @@ -7043,7 +7043,7 @@ conf80 2.45943734525 0 83.33916645 0.5608335500000067 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf81 4.00384262814 0 83.110000625 0.5849990625000032 @@ -7130,7 +7130,7 @@ conf81 4.00384262814 0 83.110000625 0.5849990625000032 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf82 3.32027935074 0 83.276666275 0.6233337250000034 @@ -7217,7 +7217,7 @@ conf82 3.32027935074 0 83.276666275 0.6233337250000034 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf83 2.94317954081 0 83.1358333 0.5462500499999976 @@ -7304,7 +7304,7 @@ conf83 2.94317954081 0 83.1358333 0.5462500499999976 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf84 4.22080428353 0 83.1049998 0.5925002999999975 @@ -7391,7 +7391,7 @@ conf84 4.22080428353 0 83.1049998 0.5925002999999975 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf85 3.81479129254 0 83.557500125 0.3424998749999958 @@ -7478,7 +7478,7 @@ conf85 3.81479129254 0 83.557500125 0.3424998749999958 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf86 2.9172205322 0 83.144165775 0.5337513374999929 @@ -7565,7 +7565,7 @@ conf86 2.9172205322 0 83.144165775 0.5337513374999929 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf87 3.06829448351 0 83.550833125 0.3491668750000031 @@ -7652,7 +7652,7 @@ conf87 3.06829448351 0 83.550833125 0.3491668750000031 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf88 2.84013960326 0 83.1766672 0.4849992000000043 @@ -7739,7 +7739,7 @@ conf88 2.84013960326 0 83.1766672 0.4849992000000043 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf89 4.1736078315 0 83.42416665 0.4758333499999964 @@ -7826,7 +7826,7 @@ conf89 4.1736078315 0 83.42416665 0.4758333499999964 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf90 3.42983465445 0 83.566666575 0.33333342500000074 @@ -7913,7 +7913,7 @@ conf90 3.42983465445 0 83.566666575 0.33333342500000074 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf91 2.92875916232 0 83.24749985 0.652500150000003 @@ -8000,7 +8000,7 @@ conf91 2.92875916232 0 83.24749985 0.652500150000003 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf92 2.76054495524 0 83.36583415 0.5341658500000023 @@ -8086,8 +8086,8 @@ conf92 2.76054495524 0 83.36583415 0.5341658500000023 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf93 3.90020836464 0 83.2075002 0.6924998000000017 @@ -8174,7 +8174,7 @@ conf93 3.90020836464 0 83.2075002 0.6924998000000017 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf94 2.79957019355 0 83.1749994 0.48750089999999346 @@ -8261,7 +8261,7 @@ conf94 2.79957019355 0 83.1749994 0.48750089999999346 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf95 3.41791929541 0 83.6141671 0.28583289999999695 @@ -8348,7 +8348,7 @@ conf95 3.41791929541 0 83.6141671 0.28583289999999695 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf96 3.28020464775 0 83.451667225 0.4483327750000058 @@ -8435,7 +8435,7 @@ conf96 3.28020464775 0 83.451667225 0.4483327750000058 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf97 2.93087218523 0 83.336666175 0.5633338249999952 @@ -8521,8 +8521,8 @@ conf97 2.93087218523 0 83.336666175 0.5633338249999952 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf98 3.69195682878 0 83.13499955 0.5475006749999949 @@ -8609,7 +8609,7 @@ conf98 3.69195682878 0 83.13499955 0.5475006749999949 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf99 3.66966788429 0 83.910833175 0.2891668250000066 @@ -8696,7 +8696,7 @@ conf99 3.66966788429 0 83.910833175 0.2891668250000066 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf100 3.99355810444 0 83.09333335 0.609999975000008 @@ -8783,7 +8783,7 @@ conf100 3.99355810444 0 83.09333335 0.609999975000008 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf101 3.91059385107 0 83.439998825 0.4600011749999965 @@ -8870,7 +8870,7 @@ conf101 3.91059385107 0 83.439998825 0.4600011749999965 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf102 2.89436719121 0 83.3733338 0.5266662000000025 @@ -8957,7 +8957,7 @@ conf102 2.89436719121 0 83.3733338 0.5266662000000025 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf103 3.96901443961 0 83.470000325 0.42999967499999914 @@ -9044,7 +9044,7 @@ conf103 3.96901443961 0 83.470000325 0.42999967499999914 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf104 3.63707016725 0 83.3091667 0.5908332999999942 @@ -9131,7 +9131,7 @@ conf104 3.63707016725 0 83.3091667 0.5908332999999942 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf105 4.02729338977 0 83.60833365 0.2916663499999942 @@ -9218,7 +9218,7 @@ conf105 4.02729338977 0 83.60833365 0.2916663499999942 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf106 2.99981298607 0 83.136667675 0.5449984875000027 @@ -9305,7 +9305,7 @@ conf106 2.99981298607 0 83.136667675 0.5449984875000027 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf107 2.98635289542 0 83.2550009 0.6449991000000012 @@ -9392,7 +9392,7 @@ conf107 2.98635289542 0 83.2550009 0.6449991000000012 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf108 3.16355414467 0 83.122499825 0.5662502624999917 @@ -9479,7 +9479,7 @@ conf108 3.16355414467 0 83.122499825 0.5662502624999917 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf109 3.54139300066 0 83.6316658 0.26833420000000674 @@ -9566,7 +9566,7 @@ conf109 3.54139300066 0 83.6316658 0.26833420000000674 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf110 3.85667371908 0 83.205000675 0.6949993250000063 @@ -9653,7 +9653,7 @@ conf110 3.85667371908 0 83.205000675 0.6949993250000063 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf111 3.47226260134 0 83.0608333 0.6587500500000019 @@ -9740,7 +9740,7 @@ conf111 3.47226260134 0 83.0608333 0.6587500500000019 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf112 2.40820216321 0 83.505000925 0.39499907499999554 @@ -9826,8 +9826,8 @@ conf112 2.40820216321 0 83.505000925 0.39499907499999554 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf113 3.8971155278 0 83.515833875 0.3841661250000016 @@ -9914,7 +9914,7 @@ conf113 3.8971155278 0 83.515833875 0.3841661250000016 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf114 2.76115778978 0 83.350832975 0.549167024999997 @@ -10000,8 +10000,8 @@ conf114 2.76115778978 0 83.350832975 0.549167024999997 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf115 3.84548578414 0 83.4325001 0.4674999000000014 @@ -10088,7 +10088,7 @@ conf115 3.84548578414 0 83.4325001 0.4674999000000014 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf116 3.81440255245 0 83.5316665 0.3683335 @@ -10175,7 +10175,7 @@ conf116 3.81440255245 0 83.5316665 0.3683335 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf117 3.46297488668 0 83.1558327 0.5162509499999928 @@ -10262,7 +10262,7 @@ conf117 3.46297488668 0 83.1558327 0.5162509499999928 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf118 3.05162390916 0 83.309999675 0.5900003249999998 @@ -10349,7 +10349,7 @@ conf118 3.05162390916 0 83.309999675 0.5900003249999998 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf119 4.00575298585 0 83.2000002 0.6999997999999948 @@ -10436,7 +10436,7 @@ conf119 4.00575298585 0 83.2000002 0.6999997999999948 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf120 3.9597325188 0 83.2366666 0.6633333999999934 @@ -10523,7 +10523,7 @@ conf120 3.9597325188 0 83.2366666 0.6633333999999934 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf121 4.16848230926 0 83.279166775 0.6208332250000069 @@ -10610,7 +10610,7 @@ conf121 4.16848230926 0 83.279166775 0.6208332250000069 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf122 3.15613106065 0 83.22666675 0.6733332499999932 @@ -10697,7 +10697,7 @@ conf122 3.15613106065 0 83.22666675 0.6733332499999932 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf123 2.96989395327 0 83.26250035 0.6374996500000037 @@ -10784,7 +10784,7 @@ conf123 2.96989395327 0 83.26250035 0.6374996500000037 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf124 4.17369428155 0 83.069166175 0.6462507374999902 @@ -10871,7 +10871,7 @@ conf124 4.17369428155 0 83.069166175 0.6462507374999902 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf125 4.1151675669 0 83.196665925 0.45500111249999264 @@ -10958,7 +10958,7 @@ conf125 4.1151675669 0 83.196665925 0.45500111249999264 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf126 2.77979520157 0 83.243333675 0.6566663249999977 @@ -11045,7 +11045,7 @@ conf126 2.77979520157 0 83.243333675 0.6566663249999977 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf127 3.77711651301 0 83.195832775 0.4562508375000007 @@ -11132,7 +11132,7 @@ conf127 3.77711651301 0 83.195832775 0.4562508375000007 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf128 3.04665804457 0 83.634999925 0.26500007499999756 @@ -11219,7 +11219,7 @@ conf128 3.04665804457 0 83.634999925 0.26500007499999756 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf129 4.03846169157 0 83.2958318 0.604168199999998 @@ -11306,7 +11306,7 @@ conf129 4.03846169157 0 83.2958318 0.604168199999998 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf130 3.62000768828 0 83.2675 0.6325000000000017 @@ -11393,7 +11393,7 @@ conf130 3.62000768828 0 83.2675 0.6325000000000017 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf131 3.96091867263 0 83.47499925 0.4250007500000038 @@ -11480,7 +11480,7 @@ conf131 3.96091867263 0 83.47499925 0.4250007500000038 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf132 2.848577387 0 83.531666975 0.36833302500000686 @@ -11567,7 +11567,7 @@ conf132 2.848577387 0 83.531666975 0.36833302500000686 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf133 3.92995199665 0 83.17916715 0.48124927500000325 @@ -11654,7 +11654,7 @@ conf133 3.92995199665 0 83.17916715 0.48124927500000325 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf134 3.38769675285 0 83.177499525 0.48375071249999735 @@ -11741,7 +11741,7 @@ conf134 3.38769675285 0 83.177499525 0.48375071249999735 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf135 3.83796805888 0 83.202499775 0.6975002249999932 @@ -11828,7 +11828,7 @@ conf135 3.83796805888 0 83.202499775 0.6975002249999932 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf136 3.33368324801 0 83.794999625 0.10500037499999737 @@ -11915,7 +11915,7 @@ conf136 3.33368324801 0 83.794999625 0.10500037499999737 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf137 2.50920843799 0 83.1599993 0.5100010500000067 @@ -12001,8 +12001,8 @@ conf137 2.50920843799 0 83.1599993 0.5100010500000067 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf138 3.29812270966 0 83.3425008 0.5574992000000037 @@ -12089,7 +12089,7 @@ conf138 3.29812270966 0 83.3425008 0.5574992000000037 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf139 4.10143003409 0 83.327500725 0.572499275000007 @@ -12176,7 +12176,7 @@ conf139 4.10143003409 0 83.327500725 0.572499275000007 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf140 3.91370812684 0 83.2191679 0.6808320999999978 @@ -12263,7 +12263,7 @@ conf140 3.91370812684 0 83.2191679 0.6808320999999978 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf141 3.92871064599 0 83.2091665 0.6908335000000051 @@ -12350,7 +12350,7 @@ conf141 3.92871064599 0 83.2091665 0.6908335000000051 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf142 4.65500523621 0 83.227499125 0.6725008750000058 @@ -12437,7 +12437,7 @@ conf142 4.65500523621 0 83.227499125 0.6725008750000058 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf143 2.40865701183 0 83.40000005 0.4999999499999973 @@ -12524,7 +12524,7 @@ conf143 2.40865701183 0 83.40000005 0.4999999499999973 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf144 3.81105496194 0 83.3700005 0.5299994999999967 @@ -12611,7 +12611,7 @@ conf144 3.81105496194 0 83.3700005 0.5299994999999967 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf145 2.5457477248 0 83.4350002 0.4649997999999954 @@ -12698,7 +12698,7 @@ conf145 2.5457477248 0 83.4350002 0.4649997999999954 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf146 3.45809656583 0 83.355833325 0.5441666749999939 @@ -12785,7 +12785,7 @@ conf146 3.45809656583 0 83.355833325 0.5441666749999939 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf147 3.44635457898 0 83.539166225 0.3608337749999976 @@ -12872,7 +12872,7 @@ conf147 3.44635457898 0 83.539166225 0.3608337749999976 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf148 3.41753668267 0 83.4791668 0.42083319999999846 @@ -12959,7 +12959,7 @@ conf148 3.41753668267 0 83.4791668 0.42083319999999846 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf149 3.52798359041 0 83.385000475 0.5149995250000018 @@ -13046,7 +13046,7 @@ conf149 3.52798359041 0 83.385000475 0.5149995250000018 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf150 2.50318934638 0 82.903333275 0.8950000875000086 @@ -13133,7 +13133,7 @@ conf150 2.50318934638 0 82.903333275 0.8950000875000086 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf151 4.86977759551 0 82.747500575 1.1287491374999945 @@ -13220,7 +13220,7 @@ conf151 4.86977759551 0 82.747500575 1.1287491374999945 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf152 4.07556332058 0 83.18666665 0.4700000249999903 @@ -13307,7 +13307,7 @@ conf152 4.07556332058 0 83.18666665 0.4700000249999903 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf153 4.29215458 0 82.4750002 1.537499700000005 @@ -13394,7 +13394,7 @@ conf153 4.29215458 0 82.4750002 1.537499700000005 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf154 3.40093211566 0 82.879168075 0.9312478875000068 @@ -13481,7 +13481,7 @@ conf154 3.40093211566 0 82.879168075 0.9312478875000068 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf155 3.86205271697 0 82.45416655 1.5687501750000052 @@ -13568,7 +13568,7 @@ conf155 3.86205271697 0 82.45416655 1.5687501750000052 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf156 5.10594235034 0 82.3866671 1.6699993500000048 @@ -13655,7 +13655,7 @@ conf156 5.10594235034 0 82.3866671 1.6699993500000048 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf157 3.97566939928 0 83.4041666 0.49583340000000364 @@ -13742,7 +13742,7 @@ conf157 3.97566939928 0 83.4041666 0.49583340000000364 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf158 4.26547578391 0 82.649166975 1.2762495375 @@ -13829,7 +13829,7 @@ conf158 4.26547578391 0 82.649166975 1.2762495375 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf159 2.86684824023 0 83.540834875 0.3591651249999984 @@ -13916,7 +13916,7 @@ conf159 2.86684824023 0 83.540834875 0.3591651249999984 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf160 4.30768574231 0 83.039166275 0.6912505875000008 @@ -14003,7 +14003,7 @@ conf160 4.30768574231 0 83.039166275 0.6912505875000008 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf161 3.3955218479 0 82.866666975 0.9499995374999983 @@ -14090,7 +14090,7 @@ conf161 3.3955218479 0 82.866666975 0.9499995374999983 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf162 3.72248093427 0 82.32999895 1.7550015749999943 @@ -14177,7 +14177,7 @@ conf162 3.72248093427 0 82.32999895 1.7550015749999943 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf163 3.27298448736 0 83.130833775 0.5537493375000011 @@ -14264,7 +14264,7 @@ conf163 3.27298448736 0 83.130833775 0.5537493375000011 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf164 4.61114936571 0 82.752500675 1.1212489875000102 @@ -14351,7 +14351,7 @@ conf164 4.61114936571 0 82.752500675 1.1212489875000102 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf165 3.19899876823 0 83.178334275 0.48249858750000385 @@ -14438,7 +14438,7 @@ conf165 3.19899876823 0 83.178334275 0.48249858750000385 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf166 3.83456440952 0 82.976666825 0.7849997625000071 @@ -14525,7 +14525,7 @@ conf166 3.83456440952 0 82.976666825 0.7849997625000071 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf167 2.30836259044 0 82.75166665 1.1225000249999937 @@ -14612,7 +14612,7 @@ conf167 2.30836259044 0 82.75166665 1.1225000249999937 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf168 3.94938021018 0 82.404166825 1.6437497624999935 @@ -14699,7 +14699,7 @@ conf168 3.94938021018 0 82.404166825 1.6437497624999935 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf169 3.03405530665 0 82.310832375 1.783751437499994 @@ -14786,7 +14786,7 @@ conf169 3.03405530665 0 82.310832375 1.783751437499994 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf170 3.19888206567 0 82.723333475 1.164999787499994 @@ -14873,7 +14873,7 @@ conf170 3.19888206567 0 82.723333475 1.164999787499994 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf171 3.53270037124 0 83.491666425 0.4083335749999947 @@ -14960,7 +14960,7 @@ conf171 3.53270037124 0 83.491666425 0.4083335749999947 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf172 4.79216667175 0 82.71833295 1.1725005749999937 @@ -15047,7 +15047,7 @@ conf172 4.79216667175 0 82.71833295 1.1725005749999937 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf173 3.02752938072 0 83.403333025 0.4966669750000051 @@ -15134,7 +15134,7 @@ conf173 3.02752938072 0 83.403333025 0.4966669750000051 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf174 3.88594686803 0 82.637499775 1.2937503375000077 @@ -15221,7 +15221,7 @@ conf174 3.88594686803 0 82.637499775 1.2937503375000077 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf175 4.57102617045 0 82.94500005 0.8324999249999934 @@ -15308,7 +15308,7 @@ conf175 4.57102617045 0 82.94500005 0.8324999249999934 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf176 4.74071864084 0 82.605000725 1.3424989125000053 @@ -15395,7 +15395,7 @@ conf176 4.74071864084 0 82.605000725 1.3424989125000053 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf177 2.82188563029 0 82.535832825 1.4462507625 @@ -15481,8 +15481,8 @@ conf177 2.82188563029 0 82.535832825 1.4462507625 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf178 4.81069968397 0 82.399999575 1.650000637500007 @@ -15569,7 +15569,7 @@ conf178 4.81069968397 0 82.399999575 1.650000637500007 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf179 3.38755218357 0 82.364999575 1.7025006375000018 @@ -15656,7 +15656,7 @@ conf179 3.38755218357 0 82.364999575 1.7025006375000018 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf180 2.91112760713 0 82.5008331 1.4987503500000088 @@ -15742,8 +15742,8 @@ conf180 2.91112760713 0 82.5008331 1.4987503500000088 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf181 3.94324960541 0 82.731666725 1.152499912500005 @@ -15830,7 +15830,7 @@ conf181 3.94324960541 0 82.731666725 1.152499912500005 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf182 3.42161292708 0 82.574166675 1.388749987499999 @@ -15917,7 +15917,7 @@ conf182 3.42161292708 0 82.574166675 1.388749987499999 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf183 4.65182047692 0 82.452499825 1.5712502624999942 @@ -16004,7 +16004,7 @@ conf183 4.65182047692 0 82.452499825 1.5712502624999942 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf184 5.04837421853 0 82.800833125 1.0487503125000046 @@ -16091,7 +16091,7 @@ conf184 5.04837421853 0 82.800833125 1.0487503125000046 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf185 2.45093941168 0 82.80000035 1.0499994749999928 @@ -16178,7 +16178,7 @@ conf185 2.45093941168 0 82.80000035 1.0499994749999928 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf186 3.01205697976 0 82.484999825 1.522500262499996 @@ -16265,7 +16265,7 @@ conf186 3.01205697976 0 82.484999825 1.522500262499996 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf187 4.82136836543 0 82.584166725 1.3737499124999957 @@ -16352,7 +16352,7 @@ conf187 4.82136836543 0 82.584166725 1.3737499124999957 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf188 3.22231213248 0 83.0100006 0.7349991000000031 @@ -16438,8 +16438,8 @@ conf188 3.22231213248 0 83.0100006 0.7349991000000031 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf189 3.4166001205 0 82.42083335 1.6187499750000072 @@ -16526,7 +16526,7 @@ conf189 3.4166001205 0 82.42083335 1.6187499750000072 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf190 4.06133523158 0 83.4216662 0.4783337999999958 @@ -16613,7 +16613,7 @@ conf190 4.06133523158 0 83.4216662 0.4783337999999958 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf191 3.94770753292 0 83.3983332 0.5016668000000039 @@ -16700,7 +16700,7 @@ conf191 3.94770753292 0 83.3983332 0.5016668000000039 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf192 3.75552436203 0 82.336667125 1.744999312499992 @@ -16787,7 +16787,7 @@ conf192 3.75552436203 0 82.336667125 1.744999312499992 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf193 2.68651427482 0 83.54833215 0.3516678500000069 @@ -16873,8 +16873,8 @@ conf193 2.68651427482 0 83.54833215 0.3516678500000069 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf194 2.55066958907 0 84.092500875 0.10749912500000108 @@ -16961,7 +16961,7 @@ conf194 2.55066958907 0 84.092500875 0.10749912500000108 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf195 2.50820466375 0 82.63416685 1.2987497250000004 @@ -17048,7 +17048,7 @@ conf195 2.50820466375 0 82.63416685 1.2987497250000004 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf196 4.01724551849 0 82.9424989 0.8362516499999941 @@ -17135,7 +17135,7 @@ conf196 4.01724551849 0 82.9424989 0.8362516499999941 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf197 3.50091311571 0 82.86916615 0.9462507750000029 @@ -17222,7 +17222,7 @@ conf197 3.50091311571 0 82.86916615 0.9462507750000029 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf198 3.68915660084 0 83.0441668 0.6837498000000011 @@ -17309,7 +17309,7 @@ conf198 3.68915660084 0 83.0441668 0.6837498000000011 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf199 4.75447357817 0 82.417499075 1.6237513875000076 @@ -17396,7 +17396,7 @@ conf199 4.75447357817 0 82.417499075 1.6237513875000076 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf200 4.78514719455 0 82.4225 1.6162500000000009 @@ -17483,7 +17483,7 @@ conf200 4.78514719455 0 82.4225 1.6162500000000009 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf201 3.09367054966 0 82.993333625 0.759999562499992 @@ -17569,8 +17569,8 @@ conf201 3.09367054966 0 82.993333625 0.759999562499992 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf202 4.91316217818 0 82.430833675 1.6037494874999965 @@ -17657,7 +17657,7 @@ conf202 4.91316217818 0 82.430833675 1.6037494874999965 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf203 3.19157274681 0 83.12833325 0.5575001250000042 @@ -17744,7 +17744,7 @@ conf203 3.19157274681 0 83.12833325 0.5575001250000042 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf204 3.97564269328 0 82.5891671 1.366249350000004 @@ -17831,7 +17831,7 @@ conf204 3.97564269328 0 82.5891671 1.366249350000004 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf205 2.68532132689 0 82.545833125 1.4312503124999978 @@ -17917,8 +17917,8 @@ conf205 2.68532132689 0 82.545833125 1.4312503124999978 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf206 3.71324465912 0 82.521666525 1.4675002124999992 @@ -18005,7 +18005,7 @@ conf206 3.71324465912 0 82.521666525 1.4675002124999992 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf207 2.8958215127 0 83.48666705 0.4133329500000059 @@ -18092,7 +18092,7 @@ conf207 2.8958215127 0 83.48666705 0.4133329500000059 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf208 3.81180477555 0 82.85749945 0.9637508249999911 @@ -18179,7 +18179,7 @@ conf208 3.81180477555 0 82.85749945 0.9637508249999911 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf209 4.73605658836 0 82.9783334 0.7824999000000048 @@ -18266,7 +18266,7 @@ conf209 4.73605658836 0 82.9783334 0.7824999000000048 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf210 4.3966762275 0 82.9116663 0.8825005500000103 @@ -18353,7 +18353,7 @@ conf210 4.3966762275 0 82.9116663 0.8825005500000103 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf211 4.84027012906 0 82.946666375 0.8300004374999901 @@ -18440,7 +18440,7 @@ conf211 4.84027012906 0 82.946666375 0.8300004374999901 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf212 4.7810074981 0 82.6783341 1.232498849999999 @@ -18527,7 +18527,7 @@ conf212 4.7810074981 0 82.6783341 1.232498849999999 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf213 3.94197981933 0 82.51499935 1.4775009750000052 @@ -18614,7 +18614,7 @@ conf213 3.94197981933 0 82.51499935 1.4775009750000052 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf214 3.09393818603 0 83.29083295 0.6091670500000049 @@ -18701,7 +18701,7 @@ conf214 3.09393818603 0 83.29083295 0.6091670500000049 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf215 4.47717663808 0 82.46999965 1.5450005249999919 @@ -18788,7 +18788,7 @@ conf215 4.47717663808 0 82.46999965 1.5450005249999919 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf216 4.00501911696 0 82.804166975 1.0437495374999983 @@ -18875,7 +18875,7 @@ conf216 4.00501911696 0 82.804166975 1.0437495374999983 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf217 3.57989449491 0 82.66333405 1.2549989249999953 @@ -18962,7 +18962,7 @@ conf217 3.57989449491 0 82.66333405 1.2549989249999953 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf218 4.11821228883 0 83.10583275 0.591250874999993 @@ -19049,7 +19049,7 @@ conf218 4.11821228883 0 83.10583275 0.591250874999993 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf219 3.88153307059 0 82.55249925 1.4212511250000048 @@ -19136,7 +19136,7 @@ conf219 3.88153307059 0 82.55249925 1.4212511250000048 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf220 5.07056073888 0 82.528332975 1.4575005375000032 @@ -19223,7 +19223,7 @@ conf220 5.07056073888 0 82.528332975 1.4575005375000032 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf221 3.8858448189 0 82.51499975 1.4775003749999982 @@ -19310,7 +19310,7 @@ conf221 3.8858448189 0 82.51499975 1.4775003749999982 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf222 4.75214121141 0 82.71583365 1.17624952500001 @@ -19397,7 +19397,7 @@ conf222 4.75214121141 0 82.71583365 1.17624952500001 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf223 2.910370732 0 82.31000025 1.7849996249999975 @@ -19484,7 +19484,7 @@ conf223 2.910370732 0 82.31000025 1.7849996249999975 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf224 3.73423244747 0 83.423333575 0.4766664249999991 @@ -19571,7 +19571,7 @@ conf224 3.73423244747 0 83.423333575 0.4766664249999991 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf225 3.76639734311 0 82.6149999 1.3275001499999988 @@ -19658,7 +19658,7 @@ conf225 3.76639734311 0 82.6149999 1.3275001499999988 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf226 4.72441884283 0 82.556666625 1.4150000624999919 @@ -19745,7 +19745,7 @@ conf226 4.72441884283 0 82.556666625 1.4150000624999919 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf227 3.39553645715 0 82.51416665 1.4787500249999894 @@ -19832,7 +19832,7 @@ conf227 3.39553645715 0 82.51416665 1.4787500249999894 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf228 3.14711977882 0 83.490832725 0.40916727499999583 @@ -19919,7 +19919,7 @@ conf228 3.14711977882 0 83.490832725 0.40916727499999583 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf229 3.20446998859 0 83.181666575 0.47750013750000875 @@ -20006,7 +20006,7 @@ conf229 3.20446998859 0 83.181666575 0.47750013750000875 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf230 4.10892724077 0 82.45166665 1.5725000249999894 @@ -20093,7 +20093,7 @@ conf230 4.10892724077 0 82.45166665 1.5725000249999894 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf231 4.04143117823 0 83.1491669 0.526249650000004 @@ -20180,7 +20180,7 @@ conf231 4.04143117823 0 83.1491669 0.526249650000004 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf232 3.68311329082 0 82.6408325 1.2887512499999971 @@ -20267,7 +20267,7 @@ conf232 3.68311329082 0 82.6408325 1.2887512499999971 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf233 3.61852655927 0 82.6024999 1.346250150000003 @@ -20354,7 +20354,7 @@ conf233 3.61852655927 0 82.6024999 1.346250150000003 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf234 3.38095068609 0 83.371666525 0.5283334750000052 @@ -20441,7 +20441,7 @@ conf234 3.38095068609 0 83.371666525 0.5283334750000052 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf235 4.10272820524 0 82.470833275 1.543750087499994 @@ -20528,7 +20528,7 @@ conf235 4.10272820524 0 82.470833275 1.543750087499994 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf236 2.12236980351 0 83.199167425 0.45124886250000174 @@ -20614,8 +20614,8 @@ conf236 2.12236980351 0 83.199167425 0.45124886250000174 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf237 4.84222431165 0 82.394166075 1.6587508874999983 @@ -20702,7 +20702,7 @@ conf237 4.84222431165 0 82.394166075 1.6587508874999983 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf238 3.83587841008 0 83.6333336 0.26666639999999975 @@ -20789,7 +20789,7 @@ conf238 3.83587841008 0 83.6333336 0.26666639999999975 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf239 3.67407110454 0 82.985834175 0.7712487375000094 @@ -20876,7 +20876,7 @@ conf239 3.67407110454 0 82.985834175 0.7712487375000094 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf240 3.84795081159 0 82.53000145 1.4549978249999995 @@ -20963,7 +20963,7 @@ conf240 3.84795081159 0 82.53000145 1.4549978249999995 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf241 4.03626436782 0 82.481666325 1.5275005124999907 @@ -21050,7 +21050,7 @@ conf241 4.03626436782 0 82.481666325 1.5275005124999907 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf242 2.73647327539 0 82.730000025 1.154999962500007 @@ -21137,7 +21137,7 @@ conf242 2.73647327539 0 82.730000025 1.154999962500007 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf243 2.9402893763 0 82.428334075 1.6074988875000074 @@ -21224,7 +21224,7 @@ conf243 2.9402893763 0 82.428334075 1.6074988875000074 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf244 3.36443733772 0 82.35333375 1.7199993749999933 @@ -21311,7 +21311,7 @@ conf244 3.36443733772 0 82.35333375 1.7199993749999933 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf245 4.58817969835 0 82.5808333 1.3787500500000078 @@ -21398,7 +21398,7 @@ conf245 4.58817969835 0 82.5808333 1.3787500500000078 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf246 4.61508278268 0 82.421665975 1.6175010375000056 @@ -21485,7 +21485,7 @@ conf246 4.61508278268 0 82.421665975 1.6175010375000056 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf247 3.73823272415 0 83.870833 0.02916699999999539 @@ -21572,7 +21572,7 @@ conf247 3.73823272415 0 83.870833 0.02916699999999539 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf248 3.73623903096 0 82.507500625 1.4887490624999913 @@ -21659,7 +21659,7 @@ conf248 3.73623903096 0 82.507500625 1.4887490624999913 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf249 4.58723959299 0 82.507499875 1.4887501875000098 @@ -21746,7 +21746,7 @@ conf249 4.58723959299 0 82.507499875 1.4887501875000098 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf250 3.92381095552 0 82.485 1.5225000000000009 @@ -21833,7 +21833,7 @@ conf250 3.92381095552 0 82.485 1.5225000000000009 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf251 4.03164481442 0 82.8166667 1.0249999500000015 @@ -21920,7 +21920,7 @@ conf251 4.03164481442 0 82.8166667 1.0249999500000015 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf252 5.03810397071 0 82.616667175 1.3249992374999948 @@ -22007,7 +22007,7 @@ conf252 5.03810397071 0 82.616667175 1.3249992374999948 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf253 3.51418495543 0 82.565833 1.4012505000000033 @@ -22094,7 +22094,7 @@ conf253 3.51418495543 0 82.565833 1.4012505000000033 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf254 4.86464446972 0 82.545833325 1.4312500124999943 @@ -22181,7 +22181,7 @@ conf254 4.86464446972 0 82.545833325 1.4312500124999943 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf255 3.00643438726 0 82.625833175 1.3112502375000048 @@ -22268,7 +22268,7 @@ conf255 3.00643438726 0 82.625833175 1.3112502375000048 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf256 3.24223825052 0 82.541666625 1.4375000624999927 @@ -22354,8 +22354,8 @@ conf256 3.24223825052 0 82.541666625 1.4375000624999927 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf257 3.53797532059 0 82.464167075 1.553749387499991 @@ -22442,7 +22442,7 @@ conf257 3.53797532059 0 82.464167075 1.553749387499991 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf258 2.69597918008 0 82.724999675 1.1625004874999902 @@ -22529,7 +22529,7 @@ conf258 2.69597918008 0 82.724999675 1.1625004874999902 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf259 4.49884904687 0 82.9025 0.8962499999999949 @@ -22616,7 +22616,7 @@ conf259 4.49884904687 0 82.9025 0.8962499999999949 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf260 3.4837176103 0 82.498332625 1.502501062499995 @@ -22703,7 +22703,7 @@ conf260 3.4837176103 0 82.498332625 1.502501062499995 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf261 4.41401318128 0 82.903332325 0.8950015125000093 @@ -22790,7 +22790,7 @@ conf261 4.41401318128 0 82.903332325 0.8950015125000093 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf262 3.67052383798 0 82.335833775 1.7462493375000037 @@ -22877,7 +22877,7 @@ conf262 3.67052383798 0 82.335833775 1.7462493375000037 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf263 4.22448259906 0 82.9066673 0.8899990500000072 @@ -22964,7 +22964,7 @@ conf263 4.22448259906 0 82.9066673 0.8899990500000072 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf264 3.96752953642 0 82.571665825 1.392501262500005 @@ -23051,7 +23051,7 @@ conf264 3.96752953642 0 82.571665825 1.392501262500005 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf265 4.60190720856 0 82.4908341 1.513748849999999 @@ -23138,7 +23138,7 @@ conf265 4.60190720856 0 82.4908341 1.513748849999999 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf266 3.35576957096 0 82.3383337 1.7424994499999897 @@ -23225,7 +23225,7 @@ conf266 3.35576957096 0 82.3383337 1.7424994499999897 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf267 4.08918081035 0 82.930000725 0.854998912500001 @@ -23312,7 +23312,7 @@ conf267 4.08918081035 0 82.930000725 0.854998912500001 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf268 3.57822046748 0 82.50833265 1.4875010250000003 @@ -23399,7 +23399,7 @@ conf268 3.57822046748 0 82.50833265 1.4875010250000003 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf269 3.01461128645 0 82.699165925 1.201251112499996 @@ -23486,7 +23486,7 @@ conf269 3.01461128645 0 82.699165925 1.201251112499996 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf270 3.61766786853 0 82.739166925 1.1412496124999905 @@ -23573,7 +23573,7 @@ conf270 3.61766786853 0 82.739166925 1.1412496124999905 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf271 3.750310087 0 82.210833125 1.9337503125000097 @@ -23660,7 +23660,7 @@ conf271 3.750310087 0 82.210833125 1.9337503125000097 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf272 3.02018362332 0 82.42083315 1.6187502749999894 @@ -23747,7 +23747,7 @@ conf272 3.02018362332 0 82.42083315 1.6187502749999894 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf273 4.36785471859 0 82.518332925 1.4725006125000064 @@ -23834,7 +23834,7 @@ conf273 4.36785471859 0 82.518332925 1.4725006125000064 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf274 4.81261636389 0 82.5000007 1.4999989499999984 @@ -23921,7 +23921,7 @@ conf274 4.81261636389 0 82.5000007 1.4999989499999984 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf275 5.03475050744 0 82.9916657 0.762501450000002 @@ -24008,7 +24008,7 @@ conf275 5.03475050744 0 82.9916657 0.762501450000002 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf276 3.20254279689 0 82.9950007 0.7574989499999916 @@ -24095,7 +24095,7 @@ conf276 3.20254279689 0 82.9950007 0.7574989499999916 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf277 3.34606399788 0 82.650833325 1.2737500125000096 @@ -24181,8 +24181,8 @@ conf277 3.34606399788 0 82.650833325 1.2737500125000096 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf278 3.49225813657 0 83.009166175 0.7362507374999936 @@ -24269,7 +24269,7 @@ conf278 3.49225813657 0 83.009166175 0.7362507374999936 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf279 3.92676884272 0 82.2883325 1.8175012500000065 @@ -24356,7 +24356,7 @@ conf279 3.92676884272 0 82.2883325 1.8175012500000065 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf280 4.70669944434 0 82.671667375 1.2424989375000024 @@ -24443,7 +24443,7 @@ conf280 4.70669944434 0 82.671667375 1.2424989375000024 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf281 3.89081058464 0 82.463332775 1.5550008375000033 @@ -24530,7 +24530,7 @@ conf281 3.89081058464 0 82.463332775 1.5550008375000033 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf282 4.66984667033 0 82.287499225 1.8187511624999928 @@ -24617,7 +24617,7 @@ conf282 4.66984667033 0 82.287499225 1.8187511624999928 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf283 4.54215357355 0 82.9841665 0.7737502499999991 @@ -24704,7 +24704,7 @@ conf283 4.54215357355 0 82.9841665 0.7737502499999991 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf284 3.03693571431 0 83.2099996 0.6900003999999967 @@ -24791,7 +24791,7 @@ conf284 3.03693571431 0 83.2099996 0.6900003999999967 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf285 3.63005230824 0 83.482500425 0.41749957500000223 @@ -24878,7 +24878,7 @@ conf285 3.63005230824 0 83.482500425 0.41749957500000223 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf286 3.74518195556 0 82.328333925 1.7574991125000068 @@ -24965,7 +24965,7 @@ conf286 3.74518195556 0 82.328333925 1.7574991125000068 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf287 4.28385957873 0 82.8808338 0.9287492999999927 @@ -25052,7 +25052,7 @@ conf287 4.28385957873 0 82.8808338 0.9287492999999927 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf288 3.63008570243 0 82.825832825 1.0112507624999907 @@ -25139,7 +25139,7 @@ conf288 3.63008570243 0 82.825832825 1.0112507624999907 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf289 4.84013060098 0 82.28916705 1.8162494249999952 @@ -25226,7 +25226,7 @@ conf289 4.84013060098 0 82.28916705 1.8162494249999952 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf290 3.48681911925 0 82.852500525 0.9712492125000054 @@ -25313,7 +25313,7 @@ conf290 3.48681911925 0 82.852500525 0.9712492125000054 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf291 3.21786984244 0 82.9083336 0.8874995999999911 @@ -25400,7 +25400,7 @@ conf291 3.21786984244 0 82.9083336 0.8874995999999911 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf292 3.55535850382 0 82.404167025 1.64374946249999 @@ -25487,7 +25487,7 @@ conf292 3.55535850382 0 82.404167025 1.64374946249999 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf293 3.89768525788 0 82.523333625 1.4649995624999903 @@ -25574,7 +25574,7 @@ conf293 3.89768525788 0 82.523333625 1.4649995624999903 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf294 4.75169294002 0 82.664166875 1.2537496874999903 @@ -25661,7 +25661,7 @@ conf294 4.75169294002 0 82.664166875 1.2537496874999903 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf295 3.14371669936 0 83.1099993 0.5850010500000025 @@ -25748,7 +25748,7 @@ conf295 3.14371669936 0 83.1099993 0.5850010500000025 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf296 5.10361442824 0 82.5608335 1.4087497499999984 @@ -25835,7 +25835,7 @@ conf296 5.10361442824 0 82.5608335 1.4087497499999984 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf297 4.18357333726 0 83.11333375 0.579999375000007 @@ -25922,7 +25922,7 @@ conf297 4.18357333726 0 83.11333375 0.579999375000007 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf298 3.42706009322 0 82.52250045 1.4662493250000068 @@ -26009,7 +26009,7 @@ conf298 3.42706009322 0 82.52250045 1.4662493250000068 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf299 2.98067415779 0 82.4658333 1.5512500500000002 @@ -26096,7 +26096,7 @@ conf299 2.98067415779 0 82.4658333 1.5512500500000002 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf300 3.88594686803 0 83.4341662 0.46583379999999297 @@ -26183,7 +26183,7 @@ conf300 3.88594686803 0 83.4341662 0.46583379999999297 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf301 4.70210402664 0 82.59833435 1.3524984749999973 @@ -26270,7 +26270,7 @@ conf301 4.70210402664 0 82.59833435 1.3524984749999973 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf302 3.64780127709 0 82.2675003 1.848749550000008 @@ -26357,7 +26357,7 @@ conf302 3.64780127709 0 82.2675003 1.848749550000008 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf303 2.96739976932 0 82.544167375 1.433748937499999 @@ -26444,7 +26444,7 @@ conf303 2.96739976932 0 82.544167375 1.433748937499999 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf304 4.68653018118 0 82.502499725 1.4962504124999896 @@ -26531,7 +26531,7 @@ conf304 4.68653018118 0 82.502499725 1.4962504124999896 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf305 2.72775066233 0 82.549167275 1.426249087499997 @@ -26617,8 +26617,8 @@ conf305 2.72775066233 0 82.549167275 1.426249087499997 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf306 4.81025398186 0 82.78249895 1.0762515749999935 @@ -26705,7 +26705,7 @@ conf306 4.81025398186 0 82.78249895 1.0762515749999935 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf307 4.01784187804 0 82.360834725 1.7087479124999945 @@ -26792,7 +26792,7 @@ conf307 4.01784187804 0 82.360834725 1.7087479124999945 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf308 3.9818651409 0 82.3950007 1.6574989500000044 @@ -26879,7 +26879,7 @@ conf308 3.9818651409 0 82.3950007 1.6574989500000044 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf309 4.39646884483 0 82.831667425 1.0024988624999907 @@ -26966,7 +26966,7 @@ conf309 4.39646884483 0 82.831667425 1.0024988624999907 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf310 4.12538749451 0 82.47000065 1.5449990249999956 @@ -27053,7 +27053,7 @@ conf310 4.12538749451 0 82.47000065 1.5449990249999956 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf311 4.02625578652 0 83.010832875 0.7337506874999917 @@ -27140,7 +27140,7 @@ conf311 4.02625578652 0 83.010832875 0.7337506874999917 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf312 3.23314207049 0 83.0258333 0.7112500499999967 @@ -27227,7 +27227,7 @@ conf312 3.23314207049 0 83.0258333 0.7112500499999967 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf313 5.07056073888 0 82.65583335 1.266249975000008 @@ -27314,7 +27314,7 @@ conf313 5.07056073888 0 82.65583335 1.266249975000008 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf314 3.66210824146 0 82.6758336 1.2362495999999936 @@ -27401,7 +27401,7 @@ conf314 3.66210824146 0 82.6758336 1.2362495999999936 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf315 3.98603853424 0 83.55333365 0.346666350000001 @@ -27488,7 +27488,7 @@ conf315 3.98603853424 0 83.55333365 0.346666350000001 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf316 4.05928520007 0 82.42750015 1.6087497749999997 @@ -27575,7 +27575,7 @@ conf316 4.05928520007 0 82.42750015 1.6087497749999997 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf317 3.95156604504 0 82.849167925 0.9762481124999951 @@ -27662,7 +27662,7 @@ conf317 3.95156604504 0 82.849167925 0.9762481124999951 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf318 5.07056073888 0 82.7108334 1.1837499000000022 @@ -27749,7 +27749,7 @@ conf318 5.07056073888 0 82.7108334 1.1837499000000022 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf319 4.54367340327 0 82.7283329 1.157500650000003 @@ -27836,7 +27836,7 @@ conf319 4.54367340327 0 82.7283329 1.157500650000003 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf320 2.81005243994 0 82.39750025 1.6537496250000103 @@ -27923,7 +27923,7 @@ conf320 2.81005243994 0 82.39750025 1.6537496250000103 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf321 4.51492338048 0 82.850833875 0.9737491874999904 @@ -28010,7 +28010,7 @@ conf321 4.51492338048 0 82.850833875 0.9737491874999904 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf322 2.96690147068 0 82.239998925 1.8900016125000079 @@ -28097,7 +28097,7 @@ conf322 2.96690147068 0 82.239998925 1.8900016125000079 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf323 4.71954979727 0 82.48083315 1.5287502750000073 @@ -28184,7 +28184,7 @@ conf323 4.71954979727 0 82.48083315 1.5287502750000073 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf324 3.2434055334 0 83.0133343 0.7299985500000048 @@ -28271,7 +28271,7 @@ conf324 3.2434055334 0 83.0133343 0.7299985500000048 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf325 5.0039862469 0 82.628333275 1.3075000874999958 @@ -28358,7 +28358,7 @@ conf325 5.0039862469 0 82.628333275 1.3075000874999958 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf326 4.27188540398 0 82.711667225 1.182499162500001 @@ -28445,7 +28445,7 @@ conf326 4.27188540398 0 82.711667225 1.182499162500001 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf327 4.82968914784 0 82.676666425 1.2350003625000099 @@ -28532,7 +28532,7 @@ conf327 4.82968914784 0 82.676666425 1.2350003625000099 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf328 4.84170071418 0 82.393334175 1.6599987374999898 @@ -28619,7 +28619,7 @@ conf328 4.84170071418 0 82.393334175 1.6599987374999898 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf329 2.76448700927 0 83.379999475 0.5200005249999947 @@ -28705,8 +28705,8 @@ conf329 2.76448700927 0 83.379999475 0.5200005249999947 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf330 4.50819005563 0 83.037499775 0.6937503374999991 @@ -28793,7 +28793,7 @@ conf330 4.50819005563 0 83.037499775 0.6937503374999991 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf331 4.78305036024 0 82.9366671 0.844999350000009 @@ -28880,7 +28880,7 @@ conf331 4.78305036024 0 82.9366671 0.844999350000009 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf332 4.14981813757 0 82.44249955 1.5862506750000094 @@ -28967,7 +28967,7 @@ conf332 4.14981813757 0 82.44249955 1.5862506750000094 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf333 4.75339681711 0 82.72 1.1700000000000017 @@ -29054,7 +29054,7 @@ conf333 4.75339681711 0 82.72 1.1700000000000017 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf334 4.47948584577 0 82.89833325 0.9025001250000102 @@ -29141,7 +29141,7 @@ conf334 4.47948584577 0 82.89833325 0.9025001250000102 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf335 5.02679188425 0 82.762499675 1.1062504874999988 @@ -29228,7 +29228,7 @@ conf335 5.02679188425 0 82.762499675 1.1062504874999988 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf336 3.88797427887 0 82.339999725 1.7400004125000024 @@ -29315,7 +29315,7 @@ conf336 3.88797427887 0 82.339999725 1.7400004125000024 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf337 3.83493211252 0 82.230832425 1.903751362499996 @@ -29402,7 +29402,7 @@ conf337 3.83493211252 0 82.230832425 1.903751362499996 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf338 4.01004506039 0 82.446666075 1.580000887500006 @@ -29489,7 +29489,7 @@ conf338 4.01004506039 0 82.446666075 1.580000887500006 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf339 4.42247279367 0 82.902500325 0.8962495124999919 @@ -29576,7 +29576,7 @@ conf339 4.42247279367 0 82.902500325 0.8962495124999919 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf340 2.50346409553 0 83.410000225 0.4899997749999955 @@ -29663,7 +29663,7 @@ conf340 2.50346409553 0 83.410000225 0.4899997749999955 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf341 4.18111303856 0 82.851667625 0.9724985624999931 @@ -29750,7 +29750,7 @@ conf341 4.18111303856 0 82.851667625 0.9724985624999931 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf342 3.96775128534 0 82.20666665 1.9400000249999962 @@ -29837,7 +29837,7 @@ conf342 3.96775128534 0 82.20666665 1.9400000249999962 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf343 2.97786909752 0 82.57250025 1.3912496249999933 @@ -29924,7 +29924,7 @@ conf343 2.97786909752 0 82.57250025 1.3912496249999933 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf344 3.76332606138 0 83.407499525 0.4925004749999943 @@ -30011,7 +30011,7 @@ conf344 3.76332606138 0 83.407499525 0.4925004749999943 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf345 2.51828940402 0 82.38833435 1.6674984750000093 @@ -30097,8 +30097,8 @@ conf345 2.51828940402 0 82.38833435 1.6674984750000093 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf346 4.63996409362 0 82.534999625 1.4475005625000037 @@ -30185,7 +30185,7 @@ conf346 4.63996409362 0 82.534999625 1.4475005625000037 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf347 2.8834064399 0 83.00083255 0.7487511750000024 @@ -30272,7 +30272,7 @@ conf347 2.8834064399 0 83.00083255 0.7487511750000024 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf348 4.01719695096 0 82.42916675 1.6062498750000103 @@ -30359,7 +30359,7 @@ conf348 4.01719695096 0 82.42916675 1.6062498750000103 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf349 3.79016967748 0 83.308332775 0.5916672250000033 @@ -30446,7 +30446,7 @@ conf349 3.79016967748 0 83.308332775 0.5916672250000033 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf350 4.02017107049 0 83.677499875 0.22250012500000482 @@ -30533,7 +30533,7 @@ conf350 4.02017107049 0 83.677499875 0.22250012500000482 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf351 3.41060528657 0 83.453334125 0.4466658750000022 @@ -30620,7 +30620,7 @@ conf351 3.41060528657 0 83.453334125 0.4466658750000022 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf352 4.34097923838 0 82.840833275 0.9887500875000086 @@ -30707,7 +30707,7 @@ conf352 4.34097923838 0 82.840833275 0.9887500875000086 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf353 4.04794102279 0 82.297500475 1.8037492874999899 @@ -30794,7 +30794,7 @@ conf353 4.04794102279 0 82.297500475 1.8037492874999899 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf354 3.82452023243 0 82.588333375 1.3674999374999928 @@ -30881,7 +30881,7 @@ conf354 3.82452023243 0 82.588333375 1.3674999374999928 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf355 4.80023382881 0 82.67500055 1.2374991750000106 @@ -30968,7 +30968,7 @@ conf355 4.80023382881 0 82.67500055 1.2374991750000106 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf356 3.56691836002 0 82.5175005 1.4737492500000045 @@ -31055,7 +31055,7 @@ conf356 3.56691836002 0 82.5175005 1.4737492500000045 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf357 4.45618504206 0 82.7649994 1.1025009000000097 @@ -31142,7 +31142,7 @@ conf357 4.45618504206 0 82.7649994 1.1025009000000097 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf358 3.18726409248 0 82.489167225 1.5162491624999959 @@ -31228,8 +31228,8 @@ conf358 3.18726409248 0 82.489167225 1.5162491624999959 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf359 4.75198860048 0 82.688332775 1.2175008374999905 @@ -31316,7 +31316,7 @@ conf359 4.75198860048 0 82.688332775 1.2175008374999905 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf360 4.2501458709 0 82.871666375 0.9425004374999943 @@ -31403,7 +31403,7 @@ conf360 4.2501458709 0 82.871666375 0.9425004374999943 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf361 3.30430376399 0 82.444167175 1.583749237499994 @@ -31490,7 +31490,7 @@ conf361 3.30430376399 0 82.444167175 1.583749237499994 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf362 5.03475050744 0 82.8366664 0.9950004000000021 @@ -31577,7 +31577,7 @@ conf362 5.03475050744 0 82.8366664 0.9950004000000021 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf363 4.28263038096 0 82.936667475 0.8449987875000105 @@ -31664,7 +31664,7 @@ conf363 4.28263038096 0 82.936667475 0.8449987875000105 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf364 3.8799230824 0 82.63083325 1.3037501250000076 @@ -31751,7 +31751,7 @@ conf364 3.8799230824 0 82.63083325 1.3037501250000076 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf365 4.04669215846 0 82.418334 1.6224989999999977 @@ -31838,7 +31838,7 @@ conf365 4.04669215846 0 82.418334 1.6224989999999977 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf366 3.40610310246 0 83.033332825 0.7000007624999967 @@ -31925,7 +31925,7 @@ conf366 3.40610310246 0 83.033332825 0.7000007624999967 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf367 3.51431327797 0 82.6341656 1.2987515999999957 @@ -32012,7 +32012,7 @@ conf367 3.51431327797 0 82.6341656 1.2987515999999957 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf368 2.89250858373 0 82.598333575 1.352499637500003 @@ -32099,7 +32099,7 @@ conf368 2.89250858373 0 82.598333575 1.352499637500003 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf369 4.76367263331 0 82.748333625 1.1274995624999988 @@ -32186,7 +32186,7 @@ conf369 4.76367263331 0 82.748333625 1.1274995624999988 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf370 3.06773390399 0 83.06166635 0.6575004750000062 @@ -32273,7 +32273,7 @@ conf370 3.06773390399 0 83.06166635 0.6575004750000062 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf371 3.81866491401 0 82.473332975 1.540000537499992 @@ -32360,7 +32360,7 @@ conf371 3.81866491401 0 82.473332975 1.540000537499992 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf372 4.07067564171 0 82.7366661 1.1450008500000095 @@ -32447,7 +32447,7 @@ conf372 4.07067564171 0 82.7366661 1.1450008500000095 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf373 4.69984886848 0 82.94000135 0.8399979749999957 @@ -32534,7 +32534,7 @@ conf373 4.69984886848 0 82.94000135 0.8399979749999957 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf374 3.95159407514 0 82.488333225 1.5175001624999922 @@ -32621,7 +32621,7 @@ conf374 3.95159407514 0 82.488333225 1.5175001624999922 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf375 3.13937863368 0 82.420833025 1.6187504625000102 @@ -32708,7 +32708,7 @@ conf375 3.13937863368 0 82.420833025 1.6187504625000102 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf376 4.06886719765 0 82.207500225 1.938749662499994 @@ -32795,7 +32795,7 @@ conf376 4.06886719765 0 82.207500225 1.938749662499994 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf377 4.82473944779 0 82.758333325 1.112500012500007 @@ -32882,7 +32882,7 @@ conf377 4.82473944779 0 82.758333325 1.112500012500007 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf378 4.10237168712 0 82.728333625 1.1574995624999929 @@ -32969,7 +32969,7 @@ conf378 4.10237168712 0 82.728333625 1.1574995624999929 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf379 4.28230271054 0 82.60416715 1.3437492750000075 @@ -33056,7 +33056,7 @@ conf379 4.28230271054 0 82.60416715 1.3437492750000075 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf380 4.07151149905 0 82.94416655 0.8337501749999916 @@ -33143,7 +33143,7 @@ conf380 4.07151149905 0 82.94416655 0.8337501749999916 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf381 3.75522652317 0 82.35083345 1.7237498250000058 @@ -33230,7 +33230,7 @@ conf381 3.75522652317 0 82.35083345 1.7237498250000058 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf382 3.21300150437 0 82.84916745 0.9762488250000061 @@ -33317,7 +33317,7 @@ conf382 3.21300150437 0 82.84916745 0.9762488250000061 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf383 3.79885050095 0 83.6158346 0.28416540000000057 @@ -33404,7 +33404,7 @@ conf383 3.79885050095 0 83.6158346 0.28416540000000057 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf384 3.88029635989 0 83.34500025 0.5549997500000018 @@ -33491,7 +33491,7 @@ conf384 3.88029635989 0 83.34500025 0.5549997500000018 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf385 4.81261636389 0 82.525833475 1.4612497875000017 @@ -33578,7 +33578,7 @@ conf385 4.81261636389 0 82.525833475 1.4612497875000017 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf386 4.06610010441 0 82.432499525 1.6012507125000042 @@ -33665,7 +33665,7 @@ conf386 4.06610010441 0 82.432499525 1.6012507125000042 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf387 3.64345739789 0 82.83833285 0.9925007249999993 @@ -33752,7 +33752,7 @@ conf387 3.64345739789 0 82.83833285 0.9925007249999993 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf388 3.1450236118 0 82.289999575 1.815000637500006 @@ -33839,7 +33839,7 @@ conf388 3.1450236118 0 82.289999575 1.815000637500006 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf389 5.0682649604 0 82.61416665 1.328750024999998 @@ -33926,7 +33926,7 @@ conf389 5.0682649604 0 82.61416665 1.328750024999998 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf390 3.37475786052 0 82.70166765 1.1974985249999932 @@ -34013,7 +34013,7 @@ conf390 3.37475786052 0 82.70166765 1.1974985249999932 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf391 4.8051791887 0 82.43666675 1.5949998749999992 @@ -34100,7 +34100,7 @@ conf391 4.8051791887 0 82.43666675 1.5949998749999992 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf392 3.58728287228 0 82.2716663 1.8425005499999898 @@ -34187,7 +34187,7 @@ conf392 3.58728287228 0 82.2716663 1.8425005499999898 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf393 4.14306014578 0 82.504999575 1.492500637500001 @@ -34274,7 +34274,7 @@ conf393 4.14306014578 0 82.504999575 1.492500637500001 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf394 4.03164481442 0 83.4266668 0.47333319999999335 @@ -34361,7 +34361,7 @@ conf394 4.03164481442 0 83.4266668 0.47333319999999335 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf395 2.42882382804 0 82.4924999 1.5112501500000022 @@ -34447,8 +34447,8 @@ conf395 2.42882382804 0 82.4924999 1.5112501500000022 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf396 3.24966131713 0 82.5441659 1.4337511500000062 @@ -34535,7 +34535,7 @@ conf396 3.24966131713 0 82.5441659 1.4337511500000062 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf397 5.03810397071 0 82.669166575 1.2462501374999917 @@ -34622,7 +34622,7 @@ conf397 5.03810397071 0 82.669166575 1.2462501374999917 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf398 3.8539741489 0 83.56000025 0.3399997499999984 @@ -34709,7 +34709,7 @@ conf398 3.8539741489 0 83.56000025 0.3399997499999984 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf399 4.62067083775 0 82.644999375 1.2825009375000036 @@ -34796,7 +34796,7 @@ conf399 4.62067083775 0 82.644999375 1.2825009375000036 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf400 3.03642820088 0 82.575833875 1.386249187499999 @@ -34883,7 +34883,7 @@ conf400 3.03642820088 0 82.575833875 1.386249187499999 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf401 3.75815991012 0 83.382499675 0.5175003249999947 @@ -34970,7 +34970,7 @@ conf401 3.75815991012 0 83.382499675 0.5175003249999947 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf402 3.13913054714 0 83.516667325 0.3833326749999998 @@ -35057,7 +35057,7 @@ conf402 3.13913054714 0 83.516667325 0.3833326749999998 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf403 3.68981492661 0 82.57916795 1.381248075000002 @@ -35144,7 +35144,7 @@ conf403 3.68981492661 0 82.57916795 1.381248075000002 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf404 4.17814274296 0 82.380833475 1.6787497874999957 @@ -35231,7 +35231,7 @@ conf404 4.17814274296 0 82.380833475 1.6787497874999957 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf405 4.65879016395 0 82.4683344 1.547498399999995 @@ -35318,7 +35318,7 @@ conf405 4.65879016395 0 82.4683344 1.547498399999995 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf406 3.33065244056 0 82.8774995 0.9337507500000015 @@ -35404,8 +35404,8 @@ conf406 3.33065244056 0 82.8774995 0.9337507500000015 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf407 4.65818702143 0 82.604165725 1.3437514124999979 @@ -35492,7 +35492,7 @@ conf407 4.65818702143 0 82.604165725 1.3437514124999979 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf408 3.71620846046 0 82.7358322 1.1462516999999934 @@ -35579,7 +35579,7 @@ conf408 3.71620846046 0 82.7358322 1.1462516999999934 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf409 4.7926796099 0 82.67833385 1.232499224999998 @@ -35666,7 +35666,7 @@ conf409 4.7926796099 0 82.67833385 1.232499224999998 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf410 4.67255813979 0 82.761666875 1.1074996874999954 @@ -35753,7 +35753,7 @@ conf410 4.67255813979 0 82.761666875 1.1074996874999954 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf411 3.52727610824 0 82.410833 1.633750500000005 @@ -35840,7 +35840,7 @@ conf411 3.52727610824 0 82.410833 1.633750500000005 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf412 4.0559263475 0 82.689167075 1.2162493874999996 @@ -35927,7 +35927,7 @@ conf412 4.0559263475 0 82.689167075 1.2162493874999996 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf413 4.76472720822 0 82.910834025 0.8837489625000003 @@ -36014,7 +36014,7 @@ conf413 4.76472720822 0 82.910834025 0.8837489625000003 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf414 5.04582745845 0 81.728333675 2.6574994874999973 @@ -36101,7 +36101,7 @@ conf414 5.04582745845 0 81.728333675 2.6574994874999973 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf415 4.16438288715 0 81.889166025 2.4162509625000084 @@ -36188,7 +36188,7 @@ conf415 4.16438288715 0 81.889166025 2.4162509625000084 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf416 3.19361943032 0 82.478333525 1.5324997125000053 @@ -36275,7 +36275,7 @@ conf416 3.19361943032 0 82.478333525 1.5324997125000053 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf417 4.67998328738 0 82.31416605 1.7787509250000042 @@ -36362,7 +36362,7 @@ conf417 4.67998328738 0 82.31416605 1.7787509250000042 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf418 4.24727900136 0 82.594167725 1.3587484124999918 @@ -36449,7 +36449,7 @@ conf418 4.24727900136 0 82.594167725 1.3587484124999918 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf419 4.21370455598 0 81.9183331 2.372500350000003 @@ -36536,7 +36536,7 @@ conf419 4.21370455598 0 81.9183331 2.372500350000003 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf420 4.01568429118 0 82.130832975 2.053750537499994 @@ -36623,7 +36623,7 @@ conf420 4.01568429118 0 82.130832975 2.053750537499994 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf421 4.44627844648 0 82.171667125 1.9924993125000015 @@ -36710,7 +36710,7 @@ conf421 4.44627844648 0 82.171667125 1.9924993125000015 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf422 4.86785715154 0 81.767500325 2.5987495124999995 @@ -36797,7 +36797,7 @@ conf422 4.86785715154 0 81.767500325 2.5987495124999995 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf423 3.78354627524 0 81.96749995 2.298750074999994 @@ -36884,7 +36884,7 @@ conf423 3.78354627524 0 81.96749995 2.298750074999994 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf424 4.82355617515 0 81.737499775 2.643750337499995 @@ -36971,7 +36971,7 @@ conf424 4.82355617515 0 81.737499775 2.643750337499995 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf425 4.68130980381 0 81.91333375 2.37999937499999 @@ -37058,7 +37058,7 @@ conf425 4.68130980381 0 81.91333375 2.37999937499999 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf426 3.76671253278 0 82.096666125 2.105000812500002 @@ -37145,7 +37145,7 @@ conf426 3.76671253278 0 82.096666125 2.105000812500002 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf427 3.46252818741 0 82.00500065 2.2424990250000008 @@ -37232,7 +37232,7 @@ conf427 3.46252818741 0 82.00500065 2.2424990250000008 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf428 4.20184196001 0 81.957499725 2.313750412499992 @@ -37319,7 +37319,7 @@ conf428 4.20184196001 0 81.957499725 2.313750412499992 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf429 4.65131508141 0 81.892499775 2.411250337499993 @@ -37406,7 +37406,7 @@ conf429 4.65131508141 0 81.892499775 2.411250337499993 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf430 3.98383769983 0 83.190833875 0.4637491875000066 @@ -37493,7 +37493,7 @@ conf430 3.98383769983 0 83.190833875 0.4637491875000066 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf431 3.27760849251 0 82.3249994 1.7625009000000063 @@ -37580,7 +37580,7 @@ conf431 3.27760849251 0 82.3249994 1.7625009000000063 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf432 4.38304315647 0 82.2058333 1.9412500500000078 @@ -37667,7 +37667,7 @@ conf432 4.38304315647 0 82.2058333 1.9412500500000078 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf433 4.77155293977 0 82.4950007 1.5074989499999916 @@ -37754,7 +37754,7 @@ conf433 4.77155293977 0 82.4950007 1.5074989499999916 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf434 4.01383283961 0 82.924166225 0.86375066250001 @@ -37841,7 +37841,7 @@ conf434 4.01383283961 0 82.924166225 0.86375066250001 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf435 3.29633871286 0 82.1750005 1.9874992500000062 @@ -37927,8 +37927,8 @@ conf435 3.29633871286 0 82.1750005 1.9874992500000062 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf436 4.00475896814 0 81.819166525 2.5212502125 @@ -38015,7 +38015,7 @@ conf436 4.00475896814 0 81.819166525 2.5212502125 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf437 4.68130980381 0 81.85750025 2.4637496249999984 @@ -38102,7 +38102,7 @@ conf437 4.68130980381 0 81.85750025 2.4637496249999984 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf438 4.91389970221 0 81.7924993 2.5612510499999956 @@ -38189,7 +38189,7 @@ conf438 4.91389970221 0 81.7924993 2.5612510499999956 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf439 4.35225506168 0 82.6324999 1.3012501500000013 @@ -38276,7 +38276,7 @@ conf439 4.35225506168 0 82.6324999 1.3012501500000013 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf440 3.38154272134 0 82.06583385 2.1512492249999937 @@ -38363,7 +38363,7 @@ conf440 3.38154272134 0 82.06583385 2.1512492249999937 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf441 2.98343299799 0 82.805833175 1.0412502374999946 @@ -38450,7 +38450,7 @@ conf441 2.98343299799 0 82.805833175 1.0412502374999946 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf442 2.77161119603 0 82.533333825 1.4499992625000004 @@ -38537,7 +38537,7 @@ conf442 2.77161119603 0 82.533333825 1.4499992625000004 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf443 4.82931872007 0 82.49666595 1.5050010750000098 @@ -38624,7 +38624,7 @@ conf443 4.82931872007 0 82.49666595 1.5050010750000098 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf444 3.04357937636 0 83.1175005 0.5737492499999917 @@ -38711,7 +38711,7 @@ conf444 3.04357937636 0 83.1175005 0.5737492499999917 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf445 3.08469075437 0 81.883334425 2.4249983624999984 @@ -38798,7 +38798,7 @@ conf445 3.08469075437 0 81.883334425 2.4249983624999984 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf446 2.96822046232 0 82.82583375 1.0112493749999985 @@ -38885,7 +38885,7 @@ conf446 2.96822046232 0 82.82583375 1.0112493749999985 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf447 3.20549043766 0 82.180000925 1.9799986124999975 @@ -38972,7 +38972,7 @@ conf447 3.20549043766 0 82.180000925 1.9799986124999975 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf448 4.75987523967 0 82.61166655 1.332500175000007 @@ -39059,7 +39059,7 @@ conf448 4.75987523967 0 82.61166655 1.332500175000007 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf449 2.91945372896 0 82.38833245 1.6675013250000106 @@ -39146,7 +39146,7 @@ conf449 2.91945372896 0 82.38833245 1.6675013250000106 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf450 4.51419508103 0 81.94833225 2.3275016249999894 @@ -39233,7 +39233,7 @@ conf450 4.51419508103 0 81.94833225 2.3275016249999894 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf451 3.52294385207 0 81.880000125 2.4299998125000073 @@ -39320,7 +39320,7 @@ conf451 3.52294385207 0 81.880000125 2.4299998125000073 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf452 4.4917740488 0 81.90999865 2.3850020249999915 @@ -39407,7 +39407,7 @@ conf452 4.4917740488 0 81.90999865 2.3850020249999915 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf453 4.34306160839 0 81.8333336 2.4999995999999953 @@ -39494,7 +39494,7 @@ conf453 4.34306160839 0 81.8333336 2.4999995999999953 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf454 4.46400957815 0 82.566666825 1.399999762500002 @@ -39581,7 +39581,7 @@ conf454 4.46400957815 0 82.566666825 1.399999762500002 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf455 4.23442583914 0 81.809166325 2.53625051249999 @@ -39668,7 +39668,7 @@ conf455 4.23442583914 0 81.809166325 2.53625051249999 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf456 3.09884756006 0 83.5633333 0.33666670000000354 @@ -39755,7 +39755,7 @@ conf456 3.09884756006 0 83.5633333 0.33666670000000354 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf457 4.17572822161 0 81.7916664 2.5625004000000047 @@ -39842,7 +39842,7 @@ conf457 4.17572822161 0 81.7916664 2.5625004000000047 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf458 4.73697574556 0 81.834165775 2.4987513374999963 @@ -39929,7 +39929,7 @@ conf458 4.73697574556 0 81.834165775 2.4987513374999963 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf459 3.58283079043 0 82.9908333 0.7637500499999916 @@ -40016,7 +40016,7 @@ conf459 3.58283079043 0 82.9908333 0.7637500499999916 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf460 4.82149011283 0 82.056665975 2.165001037499998 @@ -40103,7 +40103,7 @@ conf460 4.82149011283 0 82.056665975 2.165001037499998 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf461 4.0330664574 0 81.9033327 2.3950009499999894 @@ -40190,7 +40190,7 @@ conf461 4.0330664574 0 81.9033327 2.3950009499999894 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf462 3.52895907761 0 81.877499525 2.433750712499993 @@ -40277,7 +40277,7 @@ conf462 3.52895907761 0 81.877499525 2.433750712499993 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf463 4.98588309159 0 82.21166705 1.932499424999996 @@ -40364,7 +40364,7 @@ conf463 4.98588309159 0 82.21166705 1.932499424999996 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf464 4.37753149918 0 81.93499965 2.347500525000008 @@ -40451,7 +40451,7 @@ conf464 4.37753149918 0 81.93499965 2.347500525000008 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf465 4.75414323671 0 81.829166525 2.5062502124999924 @@ -40538,7 +40538,7 @@ conf465 4.75414323671 0 81.829166525 2.5062502124999924 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf466 4.54502802382 0 82.263334875 1.8549976875000027 @@ -40625,7 +40625,7 @@ conf466 4.54502802382 0 82.263334875 1.8549976875000027 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf467 2.88319689339 0 82.1225006 2.0662491000000074 @@ -40712,7 +40712,7 @@ conf467 2.88319689339 0 82.1225006 2.0662491000000074 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf468 5.07280967677 0 81.8925001 2.41124984999999 @@ -40799,7 +40799,7 @@ conf468 5.07280967677 0 81.8925001 2.41124984999999 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf469 4.95882658134 0 82.0083344 2.237498400000007 @@ -40886,7 +40886,7 @@ conf469 4.95882658134 0 82.0083344 2.237498400000007 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf470 4.65232049065 0 82.353333675 1.7199994874999973 @@ -40973,7 +40973,7 @@ conf470 4.65232049065 0 82.353333675 1.7199994874999973 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf471 4.76059067514 0 81.89166715 2.4124992749999947 @@ -41060,7 +41060,7 @@ conf471 4.76059067514 0 81.89166715 2.4124992749999947 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf472 4.40265700171 0 82.3550001 1.717499850000003 @@ -41147,7 +41147,7 @@ conf472 4.40265700171 0 82.3550001 1.717499850000003 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf473 4.76945454642 0 81.803334075 2.5449988875000074 @@ -41234,7 +41234,7 @@ conf473 4.76945454642 0 81.803334075 2.5449988875000074 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf474 3.65816993751 0 81.922499825 2.366250262499996 @@ -41321,7 +41321,7 @@ conf474 3.65816993751 0 81.922499825 2.366250262499996 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf475 4.09879818515 0 81.990832925 2.26375061249999 @@ -41408,7 +41408,7 @@ conf475 4.09879818515 0 81.990832925 2.26375061249999 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf476 2.73210232591 0 82.45083355 1.573749675000002 @@ -41494,8 +41494,8 @@ conf476 2.73210232591 0 82.45083355 1.573749675000002 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf477 4.06037262592 0 82.60000015 1.3499997750000006 @@ -41582,7 +41582,7 @@ conf477 4.06037262592 0 82.60000015 1.3499997750000006 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf478 5.07051186181 0 81.739166625 2.6412500625000064 @@ -41669,7 +41669,7 @@ conf478 5.07051186181 0 81.739166625 2.6412500625000064 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf479 3.01031517084 0 82.71333375 1.1799993749999942 @@ -41756,7 +41756,7 @@ conf479 3.01031517084 0 82.71333375 1.1799993749999942 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf480 3.87290970268 0 81.804999925 2.5425001124999937 @@ -41843,7 +41843,7 @@ conf480 3.87290970268 0 81.804999925 2.5425001124999937 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf481 4.86973251263 0 81.81166685 2.532499725000008 @@ -41930,7 +41930,7 @@ conf481 4.86973251263 0 81.81166685 2.532499725000008 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf482 3.51653119594 0 81.8316666 2.502500099999992 @@ -42017,7 +42017,7 @@ conf482 3.51653119594 0 81.8316666 2.502500099999992 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf483 4.76565752879 0 82.025833275 2.211250087500005 @@ -42104,7 +42104,7 @@ conf483 4.76565752879 0 82.025833275 2.211250087500005 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf484 3.92713555542 0 82.46750015 1.5487497749999903 @@ -42191,7 +42191,7 @@ conf484 3.92713555542 0 82.46750015 1.5487497749999903 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf485 3.33947996697 0 81.79000075 2.5649988749999935 @@ -42278,7 +42278,7 @@ conf485 3.33947996697 0 81.79000075 2.5649988749999935 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf486 4.09863782556 0 81.818332875 2.5225006875000062 @@ -42365,7 +42365,7 @@ conf486 4.09863782556 0 81.818332875 2.5225006875000062 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf487 4.04161555772 0 81.9275002 2.358749700000004 @@ -42452,7 +42452,7 @@ conf487 4.04161555772 0 81.9275002 2.358749700000004 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf488 4.23899755592 0 83.095833225 0.6062501624999896 @@ -42539,7 +42539,7 @@ conf488 4.23899755592 0 83.095833225 0.6062501624999896 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf489 4.21529129812 0 81.94083375 2.338749375000006 @@ -42626,7 +42626,7 @@ conf489 4.21529129812 0 81.94083375 2.338749375000006 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf490 3.12902985531 0 82.105832875 2.0912506874999934 @@ -42713,7 +42713,7 @@ conf490 3.12902985531 0 82.105832875 2.0912506874999934 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf491 3.81525081626 0 82.14499985 2.0325002249999926 @@ -42800,7 +42800,7 @@ conf491 3.81525081626 0 82.14499985 2.0325002249999926 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf492 4.69328240901 0 82.21833235 1.9225014750000042 @@ -42887,7 +42887,7 @@ conf492 4.69328240901 0 82.21833235 1.9225014750000042 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf493 4.57429034144 0 81.726666275 2.660000587500001 @@ -42974,7 +42974,7 @@ conf493 4.57429034144 0 81.726666275 2.660000587500001 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf494 3.53251693748 0 82.770834525 1.093748212500003 @@ -43061,7 +43061,7 @@ conf494 3.53251693748 0 82.770834525 1.093748212500003 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf495 3.33699282089 0 81.874166325 2.4387505124999933 @@ -43148,7 +43148,7 @@ conf495 3.33699282089 0 81.874166325 2.4387505124999933 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf496 4.98594477843 0 81.90500035 2.392499475000008 @@ -43235,7 +43235,7 @@ conf496 4.98594477843 0 81.90500035 2.392499475000008 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf497 5.05585499751 0 81.8108343 2.5337485500000057 @@ -43322,7 +43322,7 @@ conf497 5.05585499751 0 81.8108343 2.5337485500000057 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf498 3.81085313912 0 83.08583285 0.6212507249999959 @@ -43409,7 +43409,7 @@ conf498 3.81085313912 0 83.08583285 0.6212507249999959 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf499 3.85566564226 0 82.78166705 1.0774994250000063 @@ -43496,7 +43496,7 @@ conf499 3.85566564226 0 82.78166705 1.0774994250000063 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf500 2.78863101683 0 82.47416575 1.538751375000004 @@ -43583,7 +43583,7 @@ conf500 2.78863101683 0 82.47416575 1.538751375000004 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf501 3.89416305607 0 82.285833575 1.821249637500003 @@ -43670,7 +43670,7 @@ conf501 3.89416305607 0 82.285833575 1.821249637500003 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf502 3.41574390102 0 81.67750045 2.733749325000005 @@ -43757,7 +43757,7 @@ conf502 3.41574390102 0 81.67750045 2.733749325000005 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf503 5.00601243119 0 81.890833675 2.413749487500006 @@ -43844,7 +43844,7 @@ conf503 5.00601243119 0 81.890833675 2.413749487500006 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf504 4.87728360031 0 82.02666715 2.2099992750000084 @@ -43931,7 +43931,7 @@ conf504 4.87728360031 0 82.02666715 2.2099992750000084 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf505 3.83516693034 0 81.775833725 2.5862494125000026 @@ -44018,7 +44018,7 @@ conf505 3.83516693034 0 81.775833725 2.5862494125000026 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf506 3.04513115025 0 81.89833395 2.4024990750000086 @@ -44105,7 +44105,7 @@ conf506 3.04513115025 0 81.89833395 2.4024990750000086 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf507 3.27740159403 0 83.629166975 0.27083302499999606 @@ -44192,7 +44192,7 @@ conf507 3.27740159403 0 83.629166975 0.27083302499999606 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf508 2.97964104412 0 83.084999975 0.6225000374999965 @@ -44279,7 +44279,7 @@ conf508 2.97964104412 0 83.084999975 0.6225000374999965 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf509 3.77716291357 0 81.911666675 2.3824999874999904 @@ -44366,7 +44366,7 @@ conf509 3.77716291357 0 81.911666675 2.3824999874999904 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf510 4.24405840115 0 82.6800002 1.2299997000000076 @@ -44453,7 +44453,7 @@ conf510 4.24405840115 0 82.6800002 1.2299997000000076 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf511 4.68571881098 0 81.760000475 2.6099992875000027 @@ -44540,7 +44540,7 @@ conf511 4.68571881098 0 81.760000475 2.6099992875000027 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf512 3.9278405063 0 82.911666325 0.8825005125000018 @@ -44627,7 +44627,7 @@ conf512 3.9278405063 0 82.911666325 0.8825005125000018 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf513 4.42742255206 0 82.160001325 2.009998012500006 @@ -44714,7 +44714,7 @@ conf513 4.42742255206 0 82.160001325 2.009998012500006 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf514 2.2531970246 0 83.803334025 0.09666597500000196 @@ -44801,7 +44801,7 @@ conf514 2.2531970246 0 83.803334025 0.09666597500000196 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf515 3.54259695095 0 82.997500875 0.7537486874999999 @@ -44888,7 +44888,7 @@ conf515 3.54259695095 0 82.997500875 0.7537486874999999 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf516 3.67938585947 0 82.31833355 1.7724996749999917 @@ -44975,7 +44975,7 @@ conf516 3.67938585947 0 82.31833355 1.7724996749999917 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf517 4.48970416384 0 82.0241662 2.2137507000000056 @@ -45062,7 +45062,7 @@ conf517 4.48970416384 0 82.0241662 2.2137507000000056 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf518 4.0237882271 0 82.910833 0.883750500000005 @@ -45149,7 +45149,7 @@ conf518 4.0237882271 0 82.910833 0.883750500000005 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf519 3.18632939435 0 83.188333525 0.46749971249999334 @@ -45236,7 +45236,7 @@ conf519 3.18632939435 0 83.188333525 0.46749971249999334 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf520 2.77897001674 0 81.881666625 2.427500062500009 @@ -45322,8 +45322,8 @@ conf520 2.77897001674 0 81.881666625 2.427500062500009 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf521 4.20578873035 0 82.80249995 1.0462500750000032 @@ -45410,7 +45410,7 @@ conf521 4.20578873035 0 82.80249995 1.0462500750000032 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf522 4.47216574518 0 82.543333225 1.4350001625000033 @@ -45497,7 +45497,7 @@ conf522 4.47216574518 0 82.543333225 1.4350001625000033 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf523 4.03270322354 0 82.848334075 0.9774988875000048 @@ -45584,7 +45584,7 @@ conf523 4.03270322354 0 82.848334075 0.9774988875000048 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf524 3.85260446504 0 81.972500575 2.291249137500003 @@ -45671,7 +45671,7 @@ conf524 3.85260446504 0 81.972500575 2.291249137500003 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf525 4.24365616951 0 81.941666425 2.337500362500009 @@ -45758,7 +45758,7 @@ conf525 4.24365616951 0 81.941666425 2.337500362500009 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf526 3.15772099861 0 82.219999575 1.9200006374999958 @@ -45845,7 +45845,7 @@ conf526 3.15772099861 0 82.219999575 1.9200006374999958 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf527 4.88394416168 0 81.840834275 2.488748587499991 @@ -45932,7 +45932,7 @@ conf527 4.88394416168 0 81.840834275 2.488748587499991 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf528 4.74545455369 0 81.81916545 2.5212518250000002 @@ -46019,7 +46019,7 @@ conf528 4.74545455369 0 81.81916545 2.5212518250000002 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf529 4.02581419033 0 82.919167075 0.8712493874999936 @@ -46106,7 +46106,7 @@ conf529 4.02581419033 0 82.919167075 0.8712493874999936 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf530 3.7399213034 0 82.860833125 0.9587503125000012 @@ -46193,7 +46193,7 @@ conf530 3.7399213034 0 82.860833125 0.9587503125000012 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf531 4.15582335083 0 82.01916625 2.2212506250000033 @@ -46280,7 +46280,7 @@ conf531 4.15582335083 0 82.01916625 2.2212506250000033 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf532 4.03161391442 0 82.91583395 0.8762490749999898 @@ -46367,7 +46367,7 @@ conf532 4.03161391442 0 82.91583395 0.8762490749999898 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf533 3.24015051874 0 82.601667225 1.3474991625000001 @@ -46454,7 +46454,7 @@ conf533 3.24015051874 0 82.601667225 1.3474991625000001 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf534 3.11332567715 0 83.209166525 0.6908334749999995 @@ -46541,7 +46541,7 @@ conf534 3.11332567715 0 83.209166525 0.6908334749999995 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf535 3.3212018141 0 81.8266666 2.5100001000000063 @@ -46628,7 +46628,7 @@ conf535 3.3212018141 0 81.8266666 2.5100001000000063 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf536 4.51237537924 0 81.899999425 2.4000008624999936 @@ -46715,7 +46715,7 @@ conf536 4.51237537924 0 81.899999425 2.4000008624999936 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf537 3.98767584708 0 81.961666825 2.307499762500008 @@ -46802,7 +46802,7 @@ conf537 3.98767584708 0 81.961666825 2.307499762500008 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf538 5.00410258214 0 82.533334125 1.4499988125000058 @@ -46889,7 +46889,7 @@ conf538 5.00410258214 0 82.533334125 1.4499988125000058 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf539 5.00601243119 0 81.802500375 2.546249437500009 @@ -46976,7 +46976,7 @@ conf539 5.00601243119 0 81.802500375 2.546249437500009 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf540 4.06015676293 0 82.054166 2.1687510000000074 @@ -47063,7 +47063,7 @@ conf540 4.06015676293 0 82.054166 2.1687510000000074 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf541 3.38502379875 0 81.874166325 2.4387505124999933 @@ -47150,7 +47150,7 @@ conf541 3.38502379875 0 81.874166325 2.4387505124999933 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf542 3.90284443656 0 81.7308337 2.6537494499999923 @@ -47237,7 +47237,7 @@ conf542 3.90284443656 0 81.7308337 2.6537494499999923 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf543 5.15412314878 0 81.966666975 2.299999537500007 @@ -47324,7 +47324,7 @@ conf543 5.15412314878 0 81.966666975 2.299999537500007 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf544 4.27959730528 0 82.004999375 2.2425009375000045 @@ -47411,7 +47411,7 @@ conf544 4.27959730528 0 82.004999375 2.2425009375000045 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf545 4.99583458487 0 81.974999475 2.2875007874999937 @@ -47498,7 +47498,7 @@ conf545 4.99583458487 0 81.974999475 2.2875007874999937 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf546 3.57027564494 0 82.9716665 0.7925002500000033 @@ -47585,7 +47585,7 @@ conf546 3.57027564494 0 82.9716665 0.7925002500000033 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf547 4.34499447425 0 82.2008329 1.948750650000008 @@ -47672,7 +47672,7 @@ conf547 4.34499447425 0 82.2008329 1.948750650000008 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf548 4.06059199232 0 82.365832725 1.7012509124999937 @@ -47759,7 +47759,7 @@ conf548 4.06059199232 0 82.365832725 1.7012509124999937 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf549 3.45561350445 0 82.3308338 1.7537493000000097 @@ -47846,7 +47846,7 @@ conf549 3.45561350445 0 82.3308338 1.7537493000000097 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf550 2.94141961728 0 82.971665775 0.7925013374999921 @@ -47932,8 +47932,8 @@ conf550 2.94141961728 0 82.971665775 0.7925013374999921 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf551 4.51544520396 0 82.679166975 1.2312495374999983 @@ -48020,7 +48020,7 @@ conf551 4.51544520396 0 82.679166975 1.2312495374999983 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf552 4.68359307017 0 82.344166025 1.7337509624999896 @@ -48107,7 +48107,7 @@ conf552 4.68359307017 0 82.344166025 1.7337509624999896 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf553 4.30341721272 0 82.4325004 1.6012494000000075 @@ -48194,7 +48194,7 @@ conf553 4.30341721272 0 82.4325004 1.6012494000000075 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf554 3.65611150668 0 81.95416715 2.3187492749999947 @@ -48281,7 +48281,7 @@ conf554 3.65611150668 0 81.95416715 2.3187492749999947 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf555 5.01823073429 0 82.128332675 2.0575009875000063 @@ -48368,7 +48368,7 @@ conf555 5.01823073429 0 82.128332675 2.0575009875000063 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf556 3.43858984447 0 82.024166675 2.2137499874999946 @@ -48455,7 +48455,7 @@ conf556 3.43858984447 0 82.024166675 2.2137499874999946 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf557 3.46491952349 0 82.098334025 2.1024989625000003 @@ -48542,7 +48542,7 @@ conf557 3.46491952349 0 82.098334025 2.1024989625000003 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf558 2.62458204106 0 82.0374996 2.193750599999994 @@ -48628,8 +48628,8 @@ conf558 2.62458204106 0 82.0374996 2.193750599999994 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf559 5.62466058171 0 81.97000035 2.2949994749999902 @@ -48716,7 +48716,7 @@ conf559 5.62466058171 0 81.97000035 2.2949994749999902 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf560 4.44108442305 0 81.831666925 2.50249961250001 @@ -48803,7 +48803,7 @@ conf560 4.44108442305 0 81.831666925 2.50249961250001 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf561 3.32073108715 0 82.32749915 1.7587512750000087 @@ -48890,7 +48890,7 @@ conf561 3.32073108715 0 82.32749915 1.7587512750000087 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf562 3.44651637411 0 82.27833265 1.8325010250000062 @@ -48977,7 +48977,7 @@ conf562 3.44651637411 0 82.27833265 1.8325010250000062 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf563 4.11964833996 0 82.06999995 2.145000075000006 @@ -49064,7 +49064,7 @@ conf563 4.11964833996 0 82.06999995 2.145000075000006 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf564 4.29730421963 0 82.120000225 2.0699996625000026 @@ -49151,7 +49151,7 @@ conf564 4.29730421963 0 82.120000225 2.0699996625000026 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf565 4.49671370328 0 81.885832575 2.4212511375000076 @@ -49238,7 +49238,7 @@ conf565 4.49671370328 0 81.885832575 2.4212511375000076 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf566 3.64806882447 0 82.565833075 1.4012503874999993 @@ -49325,7 +49325,7 @@ conf566 3.64806882447 0 82.565833075 1.4012503874999993 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf567 3.78628368244 0 83.36999895 0.5300010500000042 @@ -49412,7 +49412,7 @@ conf567 3.78628368244 0 83.36999895 0.5300010500000042 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf568 4.0330664574 0 81.8975002 2.403749700000006 @@ -49499,7 +49499,7 @@ conf568 4.0330664574 0 81.8975002 2.403749700000006 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf569 4.82407585533 0 81.817499575 2.523750637500001 @@ -49586,7 +49586,7 @@ conf569 4.82407585533 0 81.817499575 2.523750637500001 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf570 3.56430532596 0 82.4450007 1.5824989500000086 @@ -49673,7 +49673,7 @@ conf570 3.56430532596 0 82.4450007 1.5824989500000086 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf571 2.64409929194 0 82.526667025 1.459999462500008 @@ -49760,7 +49760,7 @@ conf571 2.64409929194 0 82.526667025 1.459999462500008 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf572 3.874250119 0 82.89750005 0.9037499249999925 @@ -49847,7 +49847,7 @@ conf572 3.874250119 0 82.89750005 0.9037499249999925 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf573 3.74268230599 0 82.4050009 1.6424986499999932 @@ -49934,7 +49934,7 @@ conf573 3.74268230599 0 82.4050009 1.6424986499999932 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf574 3.93122481277 0 82.005000575 2.2424991375000047 @@ -50021,7 +50021,7 @@ conf574 3.93122481277 0 82.005000575 2.2424991375000047 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf575 2.89061353654 0 82.73666665 1.1450000249999945 @@ -50108,7 +50108,7 @@ conf575 2.89061353654 0 82.73666665 1.1450000249999945 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf576 4.9989727785 0 81.98416615 2.2737507750000105 @@ -50195,7 +50195,7 @@ conf576 4.9989727785 0 81.98416615 2.2737507750000105 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf577 4.52118810973 0 81.9933335 2.2599997499999915 @@ -50282,7 +50282,7 @@ conf577 4.52118810973 0 81.9933335 2.2599997499999915 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf578 5.00825214635 0 81.7708328 2.5937508000000093 @@ -50369,7 +50369,7 @@ conf578 5.00825214635 0 81.7708328 2.5937508000000093 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf579 4.15028298661 0 81.84333405 2.4849989250000064 @@ -50456,7 +50456,7 @@ conf579 4.15028298661 0 81.84333405 2.4849989250000064 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf580 4.49357716945 0 81.928334325 2.3574985125000083 @@ -50543,7 +50543,7 @@ conf580 4.49357716945 0 81.928334325 2.3574985125000083 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf581 2.83911511966 0 82.05833345 2.1624998249999905 @@ -50630,7 +50630,7 @@ conf581 2.83911511966 0 82.05833345 2.1624998249999905 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf582 4.31584656917 0 82.086666425 2.1200003624999937 @@ -50717,7 +50717,7 @@ conf582 4.31584656917 0 82.086666425 2.1200003624999937 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf583 4.69131549213 0 82.176665525 1.9850017124999937 @@ -50804,7 +50804,7 @@ conf583 4.69131549213 0 82.176665525 1.9850017124999937 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf584 4.01106820503 0 82.935000225 0.847499662500006 @@ -50891,7 +50891,7 @@ conf584 4.01106820503 0 82.935000225 0.847499662500006 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf585 5.01598208729 0 82.149167275 2.0262490875000054 @@ -50978,7 +50978,7 @@ conf585 5.01598208729 0 82.149167275 2.0262490875000054 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf586 4.19877736533 0 81.972500125 2.2912498125000056 @@ -51065,7 +51065,7 @@ conf586 4.19877736533 0 81.972500125 2.2912498125000056 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf587 4.81630390577 0 82.220833625 1.918749562500004 @@ -51152,7 +51152,7 @@ conf587 4.81630390577 0 82.220833625 1.918749562500004 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf588 4.2365832916 0 81.91916675 2.3712498749999966 @@ -51239,7 +51239,7 @@ conf588 4.2365832916 0 81.91916675 2.3712498749999966 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf589 3.42922931648 0 83.70000065 0.19999934999999314 @@ -51326,7 +51326,7 @@ conf589 3.42922931648 0 83.70000065 0.19999934999999314 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf590 2.53227309221 0 82.0049996 2.2425005999999925 @@ -51413,7 +51413,7 @@ conf590 2.53227309221 0 82.0049996 2.2425005999999925 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf591 4.4917740488 0 81.94333375 2.3349993750000095 @@ -51500,7 +51500,7 @@ conf591 4.4917740488 0 81.94333375 2.3349993750000095 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf592 4.69406963557 0 81.838333525 2.492499712500006 @@ -51587,7 +51587,7 @@ conf592 4.69406963557 0 81.838333525 2.492499712500006 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf593 3.56282491443 0 82.051666825 2.172499762500003 @@ -51674,7 +51674,7 @@ conf593 3.56282491443 0 82.051666825 2.172499762500003 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf594 4.87132189313 0 81.782499775 2.5762503374999923 @@ -51761,7 +51761,7 @@ conf594 4.87132189313 0 81.782499775 2.5762503374999923 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf595 3.10819732949 0 82.05833335 2.162499975000003 @@ -51848,7 +51848,7 @@ conf595 3.10819732949 0 82.05833335 2.162499975000003 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf596 4.99706829606 0 82.86166745 0.9574988250000018 @@ -51935,7 +51935,7 @@ conf596 4.99706829606 0 82.86166745 0.9574988250000018 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf597 3.94968328605 0 82.691667675 1.2124984874999925 @@ -52022,7 +52022,7 @@ conf597 3.94968328605 0 82.691667675 1.2124984874999925 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf598 4.39324961328 0 81.96083335 2.308749974999998 @@ -52109,7 +52109,7 @@ conf598 4.39324961328 0 81.96083335 2.308749974999998 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf599 4.70369074205 0 81.83666635 2.4950004749999977 @@ -52196,7 +52196,7 @@ conf599 4.70369074205 0 81.83666635 2.4950004749999977 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf600 4.24244993192 0 81.939167525 2.341248712499997 @@ -52283,7 +52283,7 @@ conf600 4.24244993192 0 81.939167525 2.341248712499997 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf601 4.20546001496 0 82.089999825 2.11500026249999 @@ -52370,7 +52370,7 @@ conf601 4.20546001496 0 82.089999825 2.11500026249999 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf602 4.0330664574 0 82.9633338 0.8049992999999986 @@ -52457,7 +52457,7 @@ conf602 4.0330664574 0 82.9633338 0.8049992999999986 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf603 4.59592944187 0 82.144167675 2.0337484874999916 @@ -52544,7 +52544,7 @@ conf603 4.59592944187 0 82.144167675 2.0337484874999916 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf604 2.74962620899 0 81.7833324 2.575001399999991 @@ -52631,7 +52631,7 @@ conf604 2.74962620899 0 81.7833324 2.575001399999991 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf605 2.7261161414 0 81.790833225 2.5637501625 @@ -52717,8 +52717,8 @@ conf605 2.7261161414 0 81.790833225 2.5637501625 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf606 4.30521903722 0 82.427500575 1.6087491375000056 @@ -52805,7 +52805,7 @@ conf606 4.30521903722 0 82.427500575 1.6087491375000056 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf607 4.69411338148 0 82.265 1.8524999999999991 @@ -52892,7 +52892,7 @@ conf607 4.69411338148 0 82.265 1.8524999999999991 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf608 3.64770629311 0 82.133333775 2.0499993375000045 @@ -52979,7 +52979,7 @@ conf608 3.64770629311 0 82.133333775 2.0499993375000045 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf609 5.03176506899 0 81.738333375 2.6424999375000056 @@ -53066,7 +53066,7 @@ conf609 5.03176506899 0 81.738333375 2.6424999375000056 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf610 4.46122236316 0 81.85166645 2.4725003250000057 @@ -53153,7 +53153,7 @@ conf610 4.46122236316 0 81.85166645 2.4725003250000057 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf611 3.89068717692 0 81.937500575 2.343749137499998 @@ -53240,7 +53240,7 @@ conf611 3.89068717692 0 81.937500575 2.343749137499998 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf612 3.99387473859 0 82.202499525 1.9462507125000101 @@ -53327,7 +53327,7 @@ conf612 3.99387473859 0 82.202499525 1.9462507125000101 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf613 3.51596353468 0 82.24333365 1.884999525000005 @@ -53414,7 +53414,7 @@ conf613 3.51596353468 0 82.24333365 1.884999525000005 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf614 4.88809103203 0 82.298333175 1.8025002375000057 @@ -53501,7 +53501,7 @@ conf614 4.88809103203 0 82.298333175 1.8025002375000057 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf615 4.99851631106 0 82.544166525 1.4337502125000086 @@ -53588,7 +53588,7 @@ conf615 4.99851631106 0 82.544166525 1.4337502125000086 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf616 3.99849182962 0 81.765833075 2.601250387499995 @@ -53675,7 +53675,7 @@ conf616 3.99849182962 0 81.765833075 2.601250387499995 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf617 4.47314674003 0 81.826666125 2.510000812499996 @@ -53762,7 +53762,7 @@ conf617 4.47314674003 0 81.826666125 2.510000812499996 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf618 4.69131549213 0 82.2766662 1.835000700000009 @@ -53849,7 +53849,7 @@ conf618 4.69131549213 0 82.2766662 1.835000700000009 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf619 3.71849354844 0 82.504166975 1.493749537499994 @@ -53936,7 +53936,7 @@ conf619 3.71849354844 0 82.504166975 1.493749537499994 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf620 3.6068179661 0 82.394165275 1.658752087499991 @@ -54023,7 +54023,7 @@ conf620 3.6068179661 0 82.394165275 1.658752087499991 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf621 5.07280967677 0 81.827499475 2.5087507875000057 @@ -54110,7 +54110,7 @@ conf621 5.07280967677 0 81.827499475 2.5087507875000057 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf622 4.5820712323 0 82.16916735 1.9962489750000074 @@ -54197,7 +54197,7 @@ conf622 4.5820712323 0 82.16916735 1.9962489750000074 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf623 3.25270396937 0 81.772500575 2.5912491375000073 @@ -54284,7 +54284,7 @@ conf623 3.25270396937 0 81.772500575 2.5912491375000073 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf624 4.37735991888 0 81.857500375 2.463749437499999 @@ -54371,7 +54371,7 @@ conf624 4.37735991888 0 81.857500375 2.463749437499999 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf625 2.93048346993 0 82.26666605 1.8500009250000033 @@ -54457,8 +54457,8 @@ conf625 2.93048346993 0 82.26666605 1.8500009250000033 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf626 3.91585561438 0 82.0924993 2.11125105 @@ -54545,7 +54545,7 @@ conf626 3.91585561438 0 82.0924993 2.11125105 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf627 3.58191688017 0 81.8816664 2.4275003999999996 @@ -54632,7 +54632,7 @@ conf627 3.58191688017 0 81.8816664 2.4275003999999996 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf628 3.69111789575 0 81.857500775 2.463748837499992 @@ -54719,7 +54719,7 @@ conf628 3.69111789575 0 81.857500775 2.463748837499992 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf629 4.98532796694 0 82.237499425 1.8937508625000063 @@ -54806,7 +54806,7 @@ conf629 4.98532796694 0 82.237499425 1.8937508625000063 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf630 3.04637421912 0 82.053333125 2.170000312500008 @@ -54893,7 +54893,7 @@ conf630 3.04637421912 0 82.053333125 2.170000312500008 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf631 4.71695949798 0 81.719166375 2.6712504374999995 @@ -54980,7 +54980,7 @@ conf631 4.71695949798 0 81.719166375 2.6712504374999995 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf632 3.3040487529 0 82.118333475 2.0724997875 @@ -55067,7 +55067,7 @@ conf632 3.3040487529 0 82.118333475 2.0724997875 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf633 3.55525492477 0 82.164166925 2.0037496124999947 @@ -55154,7 +55154,7 @@ conf633 3.55525492477 0 82.164166925 2.0037496124999947 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf634 5.06558104047 0 81.875000175 2.437499737500005 @@ -55241,7 +55241,7 @@ conf634 5.06558104047 0 81.875000175 2.437499737500005 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf635 4.66472816961 0 81.75083355 2.623749675000006 @@ -55328,7 +55328,7 @@ conf635 4.66472816961 0 81.75083355 2.623749675000006 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf636 4.51289232449 0 82.4666666 1.5500001000000054 @@ -55415,7 +55415,7 @@ conf636 4.51289232449 0 82.4666666 1.5500001000000054 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf637 4.7098698544 0 81.975833275 2.286250087500001 @@ -55502,7 +55502,7 @@ conf637 4.7098698544 0 81.975833275 2.286250087500001 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf638 4.71944536449 0 82.164167225 2.0037491625 @@ -55589,7 +55589,7 @@ conf638 4.71944536449 0 82.164167225 2.0037491625 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf639 4.67701094973 0 81.6924997 2.7112504500000014 @@ -55676,7 +55676,7 @@ conf639 4.67701094973 0 81.6924997 2.7112504500000014 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf640 4.43958123972 0 81.784167025 2.573749462499997 @@ -55763,7 +55763,7 @@ conf640 4.43958123972 0 81.784167025 2.573749462499997 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf641 4.23190538371 0 82.6116663 1.332500550000006 @@ -55850,7 +55850,7 @@ conf641 4.23190538371 0 82.6116663 1.332500550000006 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf642 3.98767584708 0 82.004999975 2.242500037499994 @@ -55937,7 +55937,7 @@ conf642 3.98767584708 0 82.004999975 2.242500037499994 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf643 3.11555935645 0 82.75750025 1.1137496249999899 @@ -56024,7 +56024,7 @@ conf643 3.11555935645 0 82.75750025 1.1137496249999899 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf644 4.02987224334 0 83.018333125 0.7225003125000029 @@ -56111,7 +56111,7 @@ conf644 4.02987224334 0 83.018333125 0.7225003125000029 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf645 2.58909203382 0 83.52916655 0.3708334500000007 @@ -56198,7 +56198,7 @@ conf645 2.58909203382 0 83.52916655 0.3708334500000007 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf646 4.00629458587 0 82.601666525 1.3475002125000017 @@ -56285,7 +56285,7 @@ conf646 4.00629458587 0 82.601666525 1.3475002125000017 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf647 2.42728592669 0 81.703333925 2.694999112500007 @@ -56372,7 +56372,7 @@ conf647 2.42728592669 0 81.703333925 2.694999112500007 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf648 4.47795824301 0 81.90416665 2.39375002500001 @@ -56459,7 +56459,7 @@ conf648 4.47795824301 0 81.90416665 2.39375002500001 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf649 2.49573043461 0 82.14333415 2.0349987749999983 @@ -56546,7 +56546,7 @@ conf649 2.49573043461 0 82.14333415 2.0349987749999983 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf650 2.26720925728 0 81.742500725 2.636248912500001 @@ -56632,8 +56632,8 @@ conf650 2.26720925728 0 81.742500725 2.636248912500001 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf651 4.95449796363 0 82.10916575 2.0862513749999962 @@ -56720,7 +56720,7 @@ conf651 4.95449796363 0 82.10916575 2.0862513749999962 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf652 5.01766838355 0 82.17749895 1.9837515749999994 @@ -56807,7 +56807,7 @@ conf652 5.01766838355 0 82.17749895 1.9837515749999994 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf653 3.07455107832 0 82.376667 1.6849995000000035 @@ -56894,7 +56894,7 @@ conf653 3.07455107832 0 82.376667 1.6849995000000035 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf654 3.62447834496 0 82.0166669 2.224999649999994 @@ -56981,7 +56981,7 @@ conf654 3.62447834496 0 82.0166669 2.224999649999994 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf655 4.55261680332 0 82.560000375 1.409999437499998 @@ -57068,7 +57068,7 @@ conf655 4.55261680332 0 82.560000375 1.409999437499998 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf656 2.23022601991 0 82.0675003 2.148749549999991 @@ -57154,8 +57154,8 @@ conf656 2.23022601991 0 82.0675003 2.148749549999991 80 gpu batchnorm fp16 1 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 +83 gpu mul fp16 1 add fp16 1 +84 gpu softmax fp16 1 ----- +++++ conf657 3.93231850502 0 82.142498775 2.0362518374999894 @@ -57242,7 +57242,7 @@ conf657 3.93231850502 0 82.142498775 2.0362518374999894 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf658 2.77983893578 0 82.331666725 1.7524999124999923 @@ -57329,7 +57329,7 @@ conf658 2.77983893578 0 82.331666725 1.7524999124999923 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf659 4.06855184229 0 82.7775 1.0837499999999949 @@ -57416,7 +57416,7 @@ conf659 4.06855184229 0 82.7775 1.0837499999999949 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf660 4.44108442305 0 81.873332925 2.4400006125000004 @@ -57503,7 +57503,7 @@ conf660 4.44108442305 0 81.873332925 2.4400006125000004 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 7 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf661 4.08395395995 0 82.532499475 1.4512507875000082 @@ -57590,7 +57590,7 @@ conf661 4.08395395995 0 82.532499475 1.4512507875000082 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf662 5.01261289279 0 82.1675003 1.9987495499999994 @@ -57677,7 +57677,7 @@ conf662 5.01261289279 0 82.1675003 1.9987495499999994 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf663 5.06328976804 0 81.8491659 2.476251149999996 @@ -57764,7 +57764,7 @@ conf663 5.06328976804 0 81.8491659 2.476251149999996 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf664 2.92128225673 0 82.2949999 1.8075001500000099 @@ -57851,7 +57851,7 @@ conf664 2.92128225673 0 82.2949999 1.8075001500000099 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf665 3.44170404332 0 82.50833415 1.487498775000006 @@ -57938,7 +57938,7 @@ conf665 3.44170404332 0 82.50833415 1.487498775000006 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf666 4.70369074205 0 81.859999675 2.460000487500004 @@ -58025,7 +58025,7 @@ conf666 4.70369074205 0 81.859999675 2.460000487500004 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf667 2.83135171284 0 81.885833175 2.421250237499997 @@ -58112,7 +58112,7 @@ conf667 2.83135171284 0 81.885833175 2.421250237499997 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf668 4.94956890224 0 82.2216673 1.9174990500000106 @@ -58199,7 +58199,7 @@ conf668 4.94956890224 0 82.2216673 1.9174990500000106 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf669 5.55422591101 0 81.986667025 2.269999462499996 @@ -58286,7 +58286,7 @@ conf669 5.55422591101 0 81.986667025 2.269999462499996 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf670 4.18183678198 0 81.8800007 2.429998950000005 @@ -58373,7 +58373,7 @@ conf670 4.18183678198 0 81.8800007 2.429998950000005 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf671 4.03161391442 0 81.9174996 2.373750600000001 @@ -58460,7 +58460,7 @@ conf671 4.03161391442 0 81.9174996 2.373750600000001 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf672 3.64732192222 0 82.884166575 0.9237501375000079 @@ -58547,7 +58547,7 @@ conf672 3.64732192222 0 82.884166575 0.9237501375000079 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf673 4.94531711032 0 81.824166075 2.5137508875000094 @@ -58634,7 +58634,7 @@ conf673 4.94531711032 0 81.824166075 2.5137508875000094 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf674 2.81172942223 0 82.6766664 1.235000399999997 @@ -58721,7 +58721,7 @@ conf674 2.81172942223 0 82.6766664 1.235000399999997 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf675 4.29690099725 0 82.072500275 2.141249587500006 @@ -58808,7 +58808,7 @@ conf675 4.29690099725 0 82.072500275 2.141249587500006 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf676 5.001248459 0 82.385000975 1.6724985375000045 @@ -58895,7 +58895,7 @@ conf676 5.001248459 0 82.385000975 1.6724985375000045 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf677 4.5933355607 0 82.68166655 1.2275001749999959 @@ -58982,7 +58982,7 @@ conf677 4.5933355607 0 82.68166655 1.2275001749999959 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf678 4.39282818539 0 82.355833475 1.7162497875000042 @@ -59069,7 +59069,7 @@ conf678 4.39282818539 0 82.355833475 1.7162497875000042 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf679 4.56579872358 0 81.7883331 2.567500349999996 @@ -59156,7 +59156,7 @@ conf679 4.56579872358 0 81.7883331 2.567500349999996 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 5 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf680 4.60227040005 0 82.21833465 1.922498024999996 @@ -59243,7 +59243,7 @@ conf680 4.60227040005 0 82.21833465 1.922498024999996 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 4 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf681 4.58741246847 0 82.2674998 1.848750300000006 @@ -59330,7 +59330,7 @@ conf681 4.58741246847 0 82.2674998 1.848750300000006 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 6 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- +++++ conf682 4.86127330244 0 81.80916655 2.5362501749999993 @@ -59417,5 +59417,5 @@ conf682 4.86127330244 0 81.80916655 2.5362501749999993 81 gpu relu fp16 1 82 gpu pool_mean fp16 1 83 promise swing_level 3 -84 gpu softmax fp32 1 +84 gpu softmax fp16 1 ----- diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet_shallow/data/autotuner_data/tuner_confs_batch220.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet_shallow/data/autotuner_data/tuner_confs_batch220.txt index d85644187b..49958d61f5 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet_shallow/data/autotuner_data/tuner_confs_batch220.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet_shallow/data/autotuner_data/tuner_confs_batch220.txt @@ -85,8 +85,8 @@ conf1 1.5 0 87.919998 0.4100020000000001 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf2 1.5 0 87.919998 0.4100020000000001 @@ -130,8 +130,8 @@ conf2 1.5 0 87.919998 0.4100020000000001 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf3 1.575539289 0 87.259995 1.005007500000005 @@ -175,8 +175,8 @@ conf3 1.575539289 0 87.259995 1.005007500000005 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf4 1.53684197237 0 87.099998 1.2450030000000112 @@ -220,8 +220,8 @@ conf4 1.53684197237 0 87.099998 1.2450030000000112 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf5 1.6287834325 0 86.639999 1.9350015000000056 @@ -265,8 +265,8 @@ conf5 1.6287834325 0 86.639999 1.9350015000000056 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf6 1.5 0 87.919998 0.4100020000000001 @@ -310,8 +310,8 @@ conf6 1.5 0 87.919998 0.4100020000000001 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf7 1.6287834325 0 86.639999 1.9350015000000056 @@ -355,8 +355,8 @@ conf7 1.6287834325 0 86.639999 1.9350015000000056 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf8 1.575539289 0 87.039993 1.335010500000017 @@ -400,8 +400,8 @@ conf8 1.575539289 0 87.039993 1.335010500000017 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf9 1.575539289 0 87.259995 1.005007500000005 @@ -445,8 +445,8 @@ conf9 1.575539289 0 87.259995 1.005007500000005 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf10 1.575539289 0 86.699997 1.845004500000016 @@ -490,8 +490,8 @@ conf10 1.575539289 0 86.699997 1.845004500000016 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf11 1.575539289 0 87.039993 1.335010500000017 @@ -535,8 +535,8 @@ conf11 1.575539289 0 87.039993 1.335010500000017 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf12 1.575539289 0 86.68 1.875 @@ -580,8 +580,8 @@ conf12 1.575539289 0 86.68 1.875 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf13 1.5 0 87.919998 0.4100020000000001 @@ -625,8 +625,8 @@ conf13 1.5 0 87.919998 0.4100020000000001 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf14 1.51532509832 0 86.979996 1.4250060000000104 @@ -670,8 +670,8 @@ conf14 1.51532509832 0 86.979996 1.4250060000000104 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf15 1.575539289 0 87.159996 1.1550060000000002 @@ -715,8 +715,8 @@ conf15 1.575539289 0 87.159996 1.1550060000000002 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf16 1.575539289 0 86.699997 1.845004500000016 @@ -760,8 +760,8 @@ conf16 1.575539289 0 86.699997 1.845004500000016 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf17 1.53684197237 0 86.819992 1.6650120000000115 @@ -805,8 +805,8 @@ conf17 1.53684197237 0 86.819992 1.6650120000000115 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf18 1.6287834325 0 86.680008 1.874988000000009 @@ -850,8 +850,8 @@ conf18 1.6287834325 0 86.680008 1.874988000000009 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf19 1.53684197237 0 87.059998 1.3050030000000206 @@ -895,8 +895,8 @@ conf19 1.53684197237 0 87.059998 1.3050030000000206 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf20 1.575539289 0 86.68 1.875 @@ -940,8 +940,8 @@ conf20 1.575539289 0 86.68 1.875 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf21 1.61623572141 0 86.0 2.8950000000000102 @@ -985,8 +985,8 @@ conf21 1.61623572141 0 86.0 2.8950000000000102 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf22 1.64686885056 0 86.139999 2.6850015000000056 @@ -1030,8 +1030,8 @@ conf22 1.64686885056 0 86.139999 2.6850015000000056 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf23 1.53684197237 0 86.68 1.875 @@ -1075,8 +1075,8 @@ conf23 1.53684197237 0 86.68 1.875 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf24 1.575539289 0 87.259995 1.005007500000005 @@ -1120,8 +1120,8 @@ conf24 1.575539289 0 87.259995 1.005007500000005 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf25 1.59245548594 0 86.440002 2.234997 @@ -1165,8 +1165,8 @@ conf25 1.59245548594 0 86.440002 2.234997 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf26 1.53684197237 0 86.720009 1.8149865000000034 @@ -1210,8 +1210,8 @@ conf26 1.53684197237 0 86.720009 1.8149865000000034 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf27 1.5 0 87.919998 0.4100020000000001 @@ -1255,8 +1255,8 @@ conf27 1.5 0 87.919998 0.4100020000000001 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf28 1.53684197237 0 87.059998 1.3050030000000206 @@ -1300,8 +1300,8 @@ conf28 1.53684197237 0 87.059998 1.3050030000000206 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf29 1.51532509832 0 86.720001 1.8149985000000157 @@ -1345,8 +1345,8 @@ conf29 1.51532509832 0 86.720001 1.8149985000000157 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf30 1.61623572141 0 86.0 2.8950000000000102 @@ -1390,8 +1390,8 @@ conf30 1.61623572141 0 86.0 2.8950000000000102 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf31 1.53684197237 0 86.840004 1.6349940000000203 @@ -1435,8 +1435,8 @@ conf31 1.53684197237 0 86.840004 1.6349940000000203 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf32 1.64686885056 0 86.259995 2.505007500000005 @@ -1480,8 +1480,8 @@ conf32 1.64686885056 0 86.259995 2.505007500000005 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf33 1.575539289 0 87.039993 1.335010500000017 @@ -1525,8 +1525,8 @@ conf33 1.575539289 0 87.039993 1.335010500000017 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf34 1.53684197237 0 87.099998 1.2450030000000112 @@ -1570,8 +1570,8 @@ conf34 1.53684197237 0 87.099998 1.2450030000000112 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf35 1.61623572141 0 86.120003 2.7149955000000148 @@ -1615,8 +1615,8 @@ conf35 1.61623572141 0 86.120003 2.7149955000000148 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf36 1.61623572141 0 86.18 2.625 @@ -1660,8 +1660,8 @@ conf36 1.61623572141 0 86.18 2.625 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf37 1.61623572141 0 86.280006 2.47499100000001 @@ -1705,8 +1705,8 @@ conf37 1.61623572141 0 86.280006 2.47499100000001 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf38 1.6287834325 0 86.639999 1.9350015000000056 @@ -1750,6 +1750,6 @@ conf38 1.6287834325 0 86.639999 1.9350015000000056 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet_shallow/data/autotuner_data/tuner_pareto_confs_batch220.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet_shallow/data/autotuner_data/tuner_pareto_confs_batch220.txt index 1608e8d2f9..1481dbaecd 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet_shallow/data/autotuner_data/tuner_pareto_confs_batch220.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet_shallow/data/autotuner_data/tuner_pareto_confs_batch220.txt @@ -85,8 +85,8 @@ conf1 1.5 0 87.919998 0.4100020000000001 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf2 1.5 0 87.919998 0.4100020000000001 @@ -130,8 +130,8 @@ conf2 1.5 0 87.919998 0.4100020000000001 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf3 1.575539289 0 87.259995 1.005007500000005 @@ -175,8 +175,8 @@ conf3 1.575539289 0 87.259995 1.005007500000005 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf4 1.53684197237 0 87.099998 1.2450030000000112 @@ -220,8 +220,8 @@ conf4 1.53684197237 0 87.099998 1.2450030000000112 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf5 1.6287834325 0 86.639999 1.9350015000000056 @@ -265,8 +265,8 @@ conf5 1.6287834325 0 86.639999 1.9350015000000056 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf6 1.5 0 87.919998 0.4100020000000001 @@ -310,8 +310,8 @@ conf6 1.5 0 87.919998 0.4100020000000001 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf7 1.6287834325 0 86.639999 1.9350015000000056 @@ -355,8 +355,8 @@ conf7 1.6287834325 0 86.639999 1.9350015000000056 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf8 1.575539289 0 87.039993 1.335010500000017 @@ -400,8 +400,8 @@ conf8 1.575539289 0 87.039993 1.335010500000017 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf9 1.575539289 0 87.259995 1.005007500000005 @@ -445,8 +445,8 @@ conf9 1.575539289 0 87.259995 1.005007500000005 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf10 1.575539289 0 87.039993 1.335010500000017 @@ -490,8 +490,8 @@ conf10 1.575539289 0 87.039993 1.335010500000017 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf11 1.5 0 87.919998 0.4100020000000001 @@ -535,8 +535,8 @@ conf11 1.5 0 87.919998 0.4100020000000001 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf12 1.575539289 0 87.159996 1.1550060000000002 @@ -580,8 +580,8 @@ conf12 1.575539289 0 87.159996 1.1550060000000002 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf13 1.53684197237 0 86.819992 1.6650120000000115 @@ -625,8 +625,8 @@ conf13 1.53684197237 0 86.819992 1.6650120000000115 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf14 1.6287834325 0 86.680008 1.874988000000009 @@ -670,8 +670,8 @@ conf14 1.6287834325 0 86.680008 1.874988000000009 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf15 1.53684197237 0 87.059998 1.3050030000000206 @@ -715,8 +715,8 @@ conf15 1.53684197237 0 87.059998 1.3050030000000206 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf16 1.61623572141 0 86.0 2.8950000000000102 @@ -760,8 +760,8 @@ conf16 1.61623572141 0 86.0 2.8950000000000102 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf17 1.64686885056 0 86.139999 2.6850015000000056 @@ -805,8 +805,8 @@ conf17 1.64686885056 0 86.139999 2.6850015000000056 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf18 1.575539289 0 87.259995 1.005007500000005 @@ -850,8 +850,8 @@ conf18 1.575539289 0 87.259995 1.005007500000005 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf19 1.59245548594 0 86.440002 2.234997 @@ -895,8 +895,8 @@ conf19 1.59245548594 0 86.440002 2.234997 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf20 1.5 0 87.919998 0.4100020000000001 @@ -940,8 +940,8 @@ conf20 1.5 0 87.919998 0.4100020000000001 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf21 1.53684197237 0 87.059998 1.3050030000000206 @@ -985,8 +985,8 @@ conf21 1.53684197237 0 87.059998 1.3050030000000206 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf22 1.61623572141 0 86.0 2.8950000000000102 @@ -1030,8 +1030,8 @@ conf22 1.61623572141 0 86.0 2.8950000000000102 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf23 1.53684197237 0 86.840004 1.6349940000000203 @@ -1075,8 +1075,8 @@ conf23 1.53684197237 0 86.840004 1.6349940000000203 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf24 1.64686885056 0 86.259995 2.505007500000005 @@ -1120,8 +1120,8 @@ conf24 1.64686885056 0 86.259995 2.505007500000005 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf25 1.575539289 0 87.039993 1.335010500000017 @@ -1165,8 +1165,8 @@ conf25 1.575539289 0 87.039993 1.335010500000017 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf26 1.53684197237 0 87.099998 1.2450030000000112 @@ -1210,8 +1210,8 @@ conf26 1.53684197237 0 87.099998 1.2450030000000112 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf27 1.61623572141 0 86.120003 2.7149955000000148 @@ -1255,8 +1255,8 @@ conf27 1.61623572141 0 86.120003 2.7149955000000148 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf28 1.61623572141 0 86.18 2.625 @@ -1300,8 +1300,8 @@ conf28 1.61623572141 0 86.18 2.625 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf29 1.61623572141 0 86.280006 2.47499100000001 @@ -1345,8 +1345,8 @@ conf29 1.61623572141 0 86.280006 2.47499100000001 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf30 1.6287834325 0 86.639999 1.9350015000000056 @@ -1390,6 +1390,6 @@ conf30 1.6287834325 0 86.639999 1.9350015000000056 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet_shallow/data/autotuner_data/tuner_promise_confs_batch220_multi.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet_shallow/data/autotuner_data/tuner_promise_confs_batch220_multi.txt index 022251ec9c..edff080259 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet_shallow/data/autotuner_data/tuner_promise_confs_batch220_multi.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet_shallow/data/autotuner_data/tuner_promise_confs_batch220_multi.txt @@ -85,8 +85,8 @@ conf2 1.53684197237 0 86.833336 1.6449960000000061 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf3 1.575539289 0 87.166664 1.1450040000000143 @@ -130,8 +130,8 @@ conf3 1.575539289 0 87.166664 1.1450040000000143 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf4 1.91497253508 0 86.889999375 1.560000937500007 @@ -175,8 +175,8 @@ conf4 1.91497253508 0 86.889999375 1.560000937500007 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf5 1.86955057246 0 87.236666875 1.0399996875000141 @@ -220,8 +220,8 @@ conf5 1.86955057246 0 87.236666875 1.0399996875000141 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf6 1.575539289 0 86.766663 1.745005500000019 @@ -265,8 +265,8 @@ conf6 1.575539289 0 86.766663 1.745005500000019 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf7 1.82459847832 0 87.2891663 0.9612505500000026 @@ -310,8 +310,8 @@ conf7 1.82459847832 0 87.2891663 0.9612505500000026 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf8 1.76108240404 0 87.29416665 0.9537500250000193 @@ -355,8 +355,8 @@ conf8 1.76108240404 0 87.29416665 0.9537500250000193 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf9 1.85810616915 0 87.267499425 0.9937508625000149 @@ -400,8 +400,8 @@ conf9 1.85810616915 0 87.267499425 0.9937508625000149 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf10 1.92713044929 0 86.775000325 1.73249951250002 @@ -445,8 +445,8 @@ conf10 1.92713044929 0 86.775000325 1.73249951250002 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf11 1.8120835937 0 86.1258333 2.7062500500000155 @@ -490,8 +490,8 @@ conf11 1.8120835937 0 86.1258333 2.7062500500000155 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf12 2.26676790856 0 86.239167575 2.536248637500016 @@ -535,8 +535,8 @@ conf12 2.26676790856 0 86.239167575 2.536248637500016 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf13 1.64686885056 0 86.433342 2.244987000000016 @@ -580,8 +580,8 @@ conf13 1.64686885056 0 86.433342 2.244987000000016 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf14 1.8120835937 0 86.783333525 1.7199997125000053 @@ -625,8 +625,8 @@ conf14 1.8120835937 0 86.783333525 1.7199997125000053 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf15 1.53684197237 0 86.833336 1.6449960000000061 @@ -670,8 +670,8 @@ conf15 1.53684197237 0 86.833336 1.6449960000000061 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf16 1.8120835937 0 86.242499475 2.5312507875000065 @@ -715,8 +715,8 @@ conf16 1.8120835937 0 86.242499475 2.5312507875000065 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf17 1.91497253508 0 86.88666565 1.5650015250000138 @@ -760,8 +760,8 @@ conf17 1.91497253508 0 86.88666565 1.5650015250000138 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf18 1.91497253508 0 86.624999825 1.9575002625000053 @@ -805,8 +805,8 @@ conf18 1.91497253508 0 86.624999825 1.9575002625000053 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf19 1.575539289 0 86.766663 1.745005500000019 @@ -850,8 +850,8 @@ conf19 1.575539289 0 86.766663 1.745005500000019 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf20 1.53684197237 0 86.733337 1.7949945000000014 @@ -895,8 +895,8 @@ conf20 1.53684197237 0 86.733337 1.7949945000000014 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf21 1.91497253508 0 86.063334275 2.7999985875000064 @@ -940,8 +940,8 @@ conf21 1.91497253508 0 86.063334275 2.7999985875000064 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf22 1.85810616915 0 87.24666645 1.0250003250000006 @@ -985,8 +985,8 @@ conf22 1.85810616915 0 87.24666645 1.0250003250000006 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf23 1.5 0 87.700005 0.6299950000000024 @@ -1030,8 +1030,8 @@ conf23 1.5 0 87.700005 0.6299950000000024 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf24 1.76108240404 0 87.311666475 0.9275002875000169 @@ -1075,8 +1075,8 @@ conf24 1.76108240404 0 87.311666475 0.9275002875000169 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf25 1.82459847832 0 87.3283339 0.9024991500000041 @@ -1120,8 +1120,8 @@ conf25 1.82459847832 0 87.3283339 0.9024991500000041 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf26 1.8794021204 0 86.918332925 1.5175006125000081 @@ -1165,8 +1165,8 @@ conf26 1.8794021204 0 86.918332925 1.5175006125000081 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf27 1.86955057246 0 87.284166775 0.9687498375000061 @@ -1210,8 +1210,8 @@ conf27 1.86955057246 0 87.284166775 0.9687498375000061 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf28 1.92713044929 0 86.83166565 1.6475015250000027 @@ -1255,8 +1255,8 @@ conf28 1.92713044929 0 86.83166565 1.6475015250000027 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf29 1.575539289 0 86.666664 1.8950040000000143 @@ -1300,8 +1300,8 @@ conf29 1.575539289 0 86.666664 1.8950040000000143 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf30 1.82459847832 0 87.3241679 0.908748150000001 @@ -1345,8 +1345,8 @@ conf30 1.82459847832 0 87.3241679 0.908748150000001 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf31 2.44970704202 0 86.158333975 2.6574990375000027 @@ -1390,8 +1390,8 @@ conf31 2.44970704202 0 86.158333975 2.6574990375000027 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf32 1.89341712619 0 86.63583385 1.9412492250000142 @@ -1435,8 +1435,8 @@ conf32 1.89341712619 0 86.63583385 1.9412492250000142 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf33 1.92713044929 0 86.372500975 2.336248537500019 @@ -1480,8 +1480,8 @@ conf33 1.92713044929 0 86.372500975 2.336248537500019 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf34 1.84732414374 0 86.6783331 1.8775003500000054 @@ -1525,8 +1525,8 @@ conf34 1.84732414374 0 86.6783331 1.8775003500000054 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf35 1.92713044929 0 86.0374994 2.838750900000008 @@ -1570,8 +1570,8 @@ conf35 1.92713044929 0 86.0374994 2.838750900000008 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf36 2.44970704202 0 86.150000175 2.6699997375000066 @@ -1615,8 +1615,8 @@ conf36 2.44970704202 0 86.150000175 2.6699997375000066 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf37 1.51532509832 0 87.200005 1.0949925000000036 @@ -1660,8 +1660,8 @@ conf37 1.51532509832 0 87.200005 1.0949925000000036 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf38 2.53989918743 0 86.17916675 2.6262498750000205 @@ -1705,8 +1705,8 @@ conf38 2.53989918743 0 86.17916675 2.6262498750000205 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf39 1.92713044929 0 86.79166795 1.7074980750000037 @@ -1750,8 +1750,8 @@ conf39 1.92713044929 0 86.79166795 1.7074980750000037 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf40 2.3805433517 0 86.058333575 2.807499637500001 @@ -1795,8 +1795,8 @@ conf40 2.3805433517 0 86.058333575 2.807499637500001 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf41 1.76108240404 0 87.299999975 0.9450000375000016 @@ -1840,8 +1840,8 @@ conf41 1.76108240404 0 87.299999975 0.9450000375000016 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf42 1.5 0 87.700005 0.6299950000000024 @@ -1885,8 +1885,8 @@ conf42 1.5 0 87.700005 0.6299950000000024 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf43 1.79865007488 0 87.34833265 0.8725010250000054 @@ -1930,8 +1930,8 @@ conf43 1.79865007488 0 87.34833265 0.8725010250000054 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf44 1.85188328957 0 86.9683334 1.4424999000000014 @@ -1975,8 +1975,8 @@ conf44 1.85188328957 0 86.9683334 1.4424999000000014 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf45 1.76108240404 0 87.280833725 0.9737494125000197 @@ -2020,8 +2020,8 @@ conf45 1.76108240404 0 87.280833725 0.9737494125000197 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf46 1.575539289 0 86.666664 1.8950040000000143 @@ -2065,8 +2065,8 @@ conf46 1.575539289 0 86.666664 1.8950040000000143 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf47 1.575539289 0 86.766663 1.745005500000019 @@ -2110,8 +2110,8 @@ conf47 1.575539289 0 86.766663 1.745005500000019 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf48 1.575539289 0 87.166664 1.1450040000000143 @@ -2155,8 +2155,8 @@ conf48 1.575539289 0 87.166664 1.1450040000000143 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf49 1.91497253508 0 86.857499875 1.6087501875000072 @@ -2200,8 +2200,8 @@ conf49 1.91497253508 0 86.857499875 1.6087501875000072 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf50 1.53684197237 0 87.200005 1.0949925000000036 @@ -2245,8 +2245,8 @@ conf50 1.53684197237 0 87.200005 1.0949925000000036 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf51 1.86955057246 0 87.275000075 0.982499887500019 @@ -2290,8 +2290,8 @@ conf51 1.86955057246 0 87.275000075 0.982499887500019 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf52 1.53684197237 0 86.833336 1.6449960000000061 @@ -2335,8 +2335,8 @@ conf52 1.53684197237 0 86.833336 1.6449960000000061 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf53 1.8120835937 0 86.9333332 1.4950001999999998 @@ -2380,8 +2380,8 @@ conf53 1.8120835937 0 86.9333332 1.4950001999999998 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf54 1.79865007488 0 87.27083365 0.98874952500001 @@ -2425,8 +2425,8 @@ conf54 1.79865007488 0 87.27083365 0.98874952500001 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf55 1.92713044929 0 86.77000015 1.7399997750000082 @@ -2470,8 +2470,8 @@ conf55 1.92713044929 0 86.77000015 1.7399997750000082 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf56 1.85810616915 0 87.296666375 0.9500004375000088 @@ -2515,8 +2515,8 @@ conf56 1.85810616915 0 87.296666375 0.9500004375000088 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf57 1.6287834325 0 86.366669 2.3449965000000077 @@ -2560,8 +2560,8 @@ conf57 1.6287834325 0 86.366669 2.3449965000000077 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf58 1.85188328957 0 86.117499325 2.7187510125000145 @@ -2605,8 +2605,8 @@ conf58 1.85188328957 0 86.117499325 2.7187510125000145 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf59 1.86955057246 0 87.32000105 0.9149984250000074 @@ -2650,8 +2650,8 @@ conf59 1.86955057246 0 87.32000105 0.9149984250000074 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf60 1.92713044929 0 86.784165925 1.7187511125000157 @@ -2695,8 +2695,8 @@ conf60 1.92713044929 0 86.784165925 1.7187511125000157 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf61 1.85810616915 0 87.3066651 0.9350023500000049 @@ -2740,8 +2740,8 @@ conf61 1.85810616915 0 87.3066651 0.9350023500000049 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf62 2.56133143265 0 86.082499875 2.7712501875000157 @@ -2785,8 +2785,8 @@ conf62 2.56133143265 0 86.082499875 2.7712501875000157 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf63 1.51532509832 0 87.200005 1.0949925000000036 @@ -2830,8 +2830,8 @@ conf63 1.51532509832 0 87.200005 1.0949925000000036 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf64 1.92713044929 0 86.05083405 2.8187489250000013 @@ -2875,8 +2875,8 @@ conf64 1.92713044929 0 86.05083405 2.8187489250000013 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf65 1.82459847832 0 87.3058339 0.9362491500000161 @@ -2920,8 +2920,8 @@ conf65 1.82459847832 0 87.3058339 0.9362491500000161 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf66 1.76108240404 0 87.304999475 0.9375007875000065 @@ -2965,8 +2965,8 @@ conf66 1.76108240404 0 87.304999475 0.9375007875000065 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf67 1.5 0 87.700005 0.6299950000000024 @@ -3010,8 +3010,8 @@ conf67 1.5 0 87.700005 0.6299950000000024 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf68 1.6287834325 0 86.366669 2.3449965000000077 @@ -3055,8 +3055,8 @@ conf68 1.6287834325 0 86.366669 2.3449965000000077 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf69 1.8794021204 0 86.8958327 1.5512509500000107 @@ -3100,8 +3100,8 @@ conf69 1.8794021204 0 86.8958327 1.5512509500000107 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf70 1.53684197237 0 86.599998 1.9950030000000112 @@ -3145,8 +3145,8 @@ conf70 1.53684197237 0 86.599998 1.9950030000000112 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf71 1.8120835937 0 86.989999975 1.410000037500005 @@ -3190,8 +3190,8 @@ conf71 1.8120835937 0 86.989999975 1.410000037500005 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf72 1.53684197237 0 87.200005 1.0949925000000036 @@ -3235,8 +3235,8 @@ conf72 1.53684197237 0 87.200005 1.0949925000000036 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf73 1.91497253508 0 86.847501625 1.6237475625000002 @@ -3280,8 +3280,8 @@ conf73 1.91497253508 0 86.847501625 1.6237475625000002 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf74 1.92713044929 0 86.81583275 1.6712508750000126 @@ -3325,8 +3325,8 @@ conf74 1.92713044929 0 86.81583275 1.6712508750000126 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf75 1.6287834325 0 86.366669 2.3449965000000077 @@ -3370,8 +3370,8 @@ conf75 1.6287834325 0 86.366669 2.3449965000000077 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf76 1.92713044929 0 86.441666825 2.2324997625000123 @@ -3415,8 +3415,8 @@ conf76 1.92713044929 0 86.441666825 2.2324997625000123 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf77 1.92713044929 0 86.17749975 2.628750375000017 @@ -3460,8 +3460,8 @@ conf77 1.92713044929 0 86.17749975 2.628750375000017 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf78 2.49809300707 0 86.127499 2.70375150000001 @@ -3505,8 +3505,8 @@ conf78 2.49809300707 0 86.127499 2.70375150000001 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf79 1.91497253508 0 86.658334175 1.9074987375000205 @@ -3550,8 +3550,8 @@ conf79 1.91497253508 0 86.658334175 1.9074987375000205 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf80 1.78224422756 0 86.676666275 1.8800005875000068 @@ -3595,8 +3595,8 @@ conf80 1.78224422756 0 86.676666275 1.8800005875000068 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf81 2.44970704202 0 86.089999125 2.760001312500002 @@ -3640,8 +3640,8 @@ conf81 2.44970704202 0 86.089999125 2.760001312500002 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf82 1.575539289 0 87.166664 1.1450040000000143 @@ -3685,8 +3685,8 @@ conf82 1.575539289 0 87.166664 1.1450040000000143 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf83 2.41847778534 0 86.2258335 2.5562497500000205 @@ -3730,8 +3730,8 @@ conf83 2.41847778534 0 86.2258335 2.5562497500000205 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf84 2.44970704202 0 86.255832625 2.5112510625000155 @@ -3775,8 +3775,8 @@ conf84 2.44970704202 0 86.255832625 2.5112510625000155 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf85 2.37309882107 0 86.251666225 2.5175006625000194 @@ -3820,8 +3820,8 @@ conf85 2.37309882107 0 86.251666225 2.5175006625000194 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf86 2.3805433517 0 86.198333375 2.597499937500004 @@ -3865,8 +3865,8 @@ conf86 2.3805433517 0 86.198333375 2.597499937500004 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf87 2.49809300707 0 86.043332925 2.830000612500008 @@ -3910,8 +3910,8 @@ conf87 2.49809300707 0 86.043332925 2.830000612500008 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf88 1.5 0 87.700005 0.6299950000000024 @@ -3955,8 +3955,8 @@ conf88 1.5 0 87.700005 0.6299950000000024 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf89 1.76108240404 0 87.301665775 0.9425013375000049 @@ -4000,8 +4000,8 @@ conf89 1.76108240404 0 87.301665775 0.9425013375000049 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf90 1.79865007488 0 87.2949999 0.9525001500000201 @@ -4045,8 +4045,8 @@ conf90 1.79865007488 0 87.2949999 0.9525001500000201 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf91 1.53684197237 0 87.200005 1.0949925000000036 @@ -4090,8 +4090,8 @@ conf91 1.53684197237 0 87.200005 1.0949925000000036 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf92 1.91497253508 0 86.886665975 1.5650010375000107 @@ -4135,8 +4135,8 @@ conf92 1.91497253508 0 86.886665975 1.5650010375000107 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf93 1.5 0 87.700005 0.6299950000000024 @@ -4180,8 +4180,8 @@ conf93 1.5 0 87.700005 0.6299950000000024 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf94 1.53684197237 0 86.599998 1.9950030000000112 @@ -4225,8 +4225,8 @@ conf94 1.53684197237 0 86.599998 1.9950030000000112 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf95 1.53684197237 0 86.833336 1.6449960000000061 @@ -4270,8 +4270,8 @@ conf95 1.53684197237 0 86.833336 1.6449960000000061 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf96 1.91497253508 0 86.656666625 1.9100000625000106 @@ -4315,8 +4315,8 @@ conf96 1.91497253508 0 86.656666625 1.9100000625000106 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf97 1.85188328957 0 86.419167025 2.2662494625000207 @@ -4360,8 +4360,8 @@ conf97 1.85188328957 0 86.419167025 2.2662494625000207 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf98 2.37309882107 0 86.22833235 2.552501475000007 @@ -4405,8 +4405,8 @@ conf98 2.37309882107 0 86.22833235 2.552501475000007 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf99 2.44970704202 0 86.206666125 2.585000812500013 @@ -4450,8 +4450,8 @@ conf99 2.44970704202 0 86.206666125 2.585000812500013 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf100 2.49809300707 0 86.1725 2.636250000000011 @@ -4495,8 +4495,8 @@ conf100 2.49809300707 0 86.1725 2.636250000000011 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf101 2.43009500005 0 86.2274992 2.553751200000015 @@ -4540,8 +4540,8 @@ conf101 2.43009500005 0 86.2274992 2.553751200000015 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf102 1.8794021204 0 86.936667075 1.4899993875000064 @@ -4585,8 +4585,8 @@ conf102 1.8794021204 0 86.936667075 1.4899993875000064 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf103 1.92713044929 0 86.216666 2.570001000000005 @@ -4630,8 +4630,8 @@ conf103 1.92713044929 0 86.216666 2.570001000000005 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf104 1.8120835937 0 86.196668575 2.5999971375000044 @@ -4675,8 +4675,8 @@ conf104 1.8120835937 0 86.196668575 2.5999971375000044 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf105 1.53684197237 0 86.433327 2.245009500000002 @@ -4720,8 +4720,8 @@ conf105 1.53684197237 0 86.433327 2.245009500000002 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf106 1.5 0 87.700005 0.6299950000000024 @@ -4765,8 +4765,8 @@ conf106 1.5 0 87.700005 0.6299950000000024 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf107 1.8120835937 0 86.568333825 2.042499262500016 @@ -4810,8 +4810,8 @@ conf107 1.8120835937 0 86.568333825 2.042499262500016 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf108 1.53684197237 0 86.599998 1.9950030000000112 @@ -4855,8 +4855,8 @@ conf108 1.53684197237 0 86.599998 1.9950030000000112 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf109 1.53684197237 0 86.833336 1.6449960000000061 @@ -4900,8 +4900,8 @@ conf109 1.53684197237 0 86.833336 1.6449960000000061 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf110 1.92713044929 0 86.35999975 2.35500037500001 @@ -4945,8 +4945,8 @@ conf110 1.92713044929 0 86.35999975 2.35500037500001 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf111 1.92713044929 0 86.162498825 2.65125176250001 @@ -4990,8 +4990,8 @@ conf111 1.92713044929 0 86.162498825 2.65125176250001 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf112 1.53684197237 0 86.599998 1.9950030000000112 @@ -5035,8 +5035,8 @@ conf112 1.53684197237 0 86.599998 1.9950030000000112 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf113 1.92713044929 0 86.401667325 2.292499012500002 @@ -5080,8 +5080,8 @@ conf113 1.92713044929 0 86.401667325 2.292499012500002 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf114 1.82459847832 0 87.32833315 0.9025002750000013 @@ -5125,8 +5125,8 @@ conf114 1.82459847832 0 87.32833315 0.9025002750000013 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf115 1.92713044929 0 86.784167825 1.7187482625000143 @@ -5170,8 +5170,8 @@ conf115 1.92713044929 0 86.784167825 1.7187482625000143 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf116 1.53684197237 0 86.433327 2.245009500000002 @@ -5215,8 +5215,8 @@ conf116 1.53684197237 0 86.433327 2.245009500000002 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf117 1.53684197237 0 87.200005 1.0949925000000036 @@ -5260,8 +5260,8 @@ conf117 1.53684197237 0 87.200005 1.0949925000000036 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf118 1.53684197237 0 86.733337 1.7949945000000014 @@ -5305,8 +5305,8 @@ conf118 1.53684197237 0 86.733337 1.7949945000000014 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf119 1.5 0 87.700005 0.6299950000000024 @@ -5350,8 +5350,8 @@ conf119 1.5 0 87.700005 0.6299950000000024 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf120 1.92713044929 0 86.8250003 1.6574995500000114 @@ -5395,8 +5395,8 @@ conf120 1.92713044929 0 86.8250003 1.6574995500000114 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf121 1.5 0 87.700005 0.6299950000000024 @@ -5440,8 +5440,8 @@ conf121 1.5 0 87.700005 0.6299950000000024 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf122 1.79865007488 0 87.325833725 0.9062494125000171 @@ -5485,8 +5485,8 @@ conf122 1.79865007488 0 87.325833725 0.9062494125000171 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf123 1.53684197237 0 86.833336 1.6449960000000061 @@ -5530,8 +5530,8 @@ conf123 1.53684197237 0 86.833336 1.6449960000000061 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf124 1.8120835937 0 86.9791668 1.426249800000008 @@ -5575,8 +5575,8 @@ conf124 1.8120835937 0 86.9791668 1.426249800000008 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf125 1.85188328957 0 86.9275005 1.5037492500000198 @@ -5620,8 +5620,8 @@ conf125 1.85188328957 0 86.9275005 1.5037492500000198 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf126 1.92713044929 0 86.343332825 2.3800007625000035 @@ -5665,8 +5665,8 @@ conf126 1.92713044929 0 86.343332825 2.3800007625000035 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf127 2.3805433517 0 86.2050007 2.587498950000011 @@ -5710,8 +5710,8 @@ conf127 2.3805433517 0 86.2050007 2.587498950000011 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf128 1.8120835937 0 86.0024996 2.8912506000000207 diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet_shallow/data/autotuner_data/tuner_promise_confs_batch220_single.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet_shallow/data/autotuner_data/tuner_promise_confs_batch220_single.txt index 36dcd7a0b7..26158f5d4f 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet_shallow/data/autotuner_data/tuner_promise_confs_batch220_single.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet_shallow/data/autotuner_data/tuner_promise_confs_batch220_single.txt @@ -85,8 +85,8 @@ conf1 1.5 0 87.700005 0.6299950000000024 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf2 1.85188328957 0 86.93166605 1.4975009250000042 @@ -130,8 +130,8 @@ conf2 1.85188328957 0 86.93166605 1.4975009250000042 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf3 1.53684197237 0 86.599998 1.9950030000000112 @@ -175,8 +175,8 @@ conf3 1.53684197237 0 86.599998 1.9950030000000112 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf4 1.5 0 87.700005 0.6299950000000024 @@ -220,8 +220,8 @@ conf4 1.5 0 87.700005 0.6299950000000024 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf5 1.575539289 0 87.166664 1.1450040000000143 @@ -265,8 +265,8 @@ conf5 1.575539289 0 87.166664 1.1450040000000143 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf6 1.8120835937 0 86.9866658 1.4150013000000143 @@ -310,8 +310,8 @@ conf6 1.8120835937 0 86.9866658 1.4150013000000143 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf7 1.53684197237 0 87.200005 1.0949925000000036 @@ -355,8 +355,8 @@ conf7 1.53684197237 0 87.200005 1.0949925000000036 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf8 1.86955057246 0 87.21583265 1.0712510250000165 @@ -400,8 +400,8 @@ conf8 1.86955057246 0 87.21583265 1.0712510250000165 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf9 1.85810616915 0 87.304165475 0.9387517875000029 @@ -445,8 +445,8 @@ conf9 1.85810616915 0 87.304165475 0.9387517875000029 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf10 1.8794021204 0 86.891667625 1.5574985625000153 @@ -490,8 +490,8 @@ conf10 1.8794021204 0 86.891667625 1.5574985625000153 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf11 1.92713044929 0 86.768333375 1.742499937500014 @@ -535,8 +535,8 @@ conf11 1.92713044929 0 86.768333375 1.742499937500014 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf12 1.76108240404 0 87.2933327 0.95500095000002 @@ -580,8 +580,8 @@ conf12 1.76108240404 0 87.2933327 0.95500095000002 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf13 1.82459847832 0 87.313334375 0.9249984375000153 @@ -625,8 +625,8 @@ conf13 1.82459847832 0 87.313334375 0.9249984375000153 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf14 1.575539289 0 86.666664 1.8950040000000143 @@ -670,8 +670,8 @@ conf14 1.575539289 0 86.666664 1.8950040000000143 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf15 1.91497253508 0 86.847500875 1.6237486875000187 @@ -715,8 +715,8 @@ conf15 1.91497253508 0 86.847500875 1.6237486875000187 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf16 1.575539289 0 86.766663 1.745005500000019 @@ -760,8 +760,8 @@ conf16 1.575539289 0 86.766663 1.745005500000019 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf17 1.53684197237 0 86.833336 1.6449960000000061 @@ -805,8 +805,8 @@ conf17 1.53684197237 0 86.833336 1.6449960000000061 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf18 1.8794021204 0 86.15416655 2.663750175000011 @@ -850,8 +850,8 @@ conf18 1.8794021204 0 86.15416655 2.663750175000011 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf19 1.59245548594 0 86.5 2.1450000000000102 @@ -895,8 +895,8 @@ conf19 1.59245548594 0 86.5 2.1450000000000102 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf20 1.92713044929 0 86.310000175 2.4299997375000117 @@ -940,8 +940,8 @@ conf20 1.92713044929 0 86.310000175 2.4299997375000117 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf21 1.8120835937 0 86.0424991 2.8312513500000094 @@ -985,8 +985,8 @@ conf21 1.8120835937 0 86.0424991 2.8312513500000094 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf22 1.92713044929 0 86.097501 2.748748500000019 @@ -1030,8 +1030,8 @@ conf22 1.92713044929 0 86.097501 2.748748500000019 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf23 1.8120835937 0 86.256665775 2.5100013375000074 @@ -1075,8 +1075,8 @@ conf23 1.8120835937 0 86.256665775 2.5100013375000074 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf24 1.88167957812 0 86.67750015 1.87874977500001 @@ -1120,8 +1120,8 @@ conf24 1.88167957812 0 86.67750015 1.87874977500001 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf25 1.61623572141 0 86.066673 2.7949905000000186 @@ -1165,8 +1165,8 @@ conf25 1.61623572141 0 86.066673 2.7949905000000186 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf26 2.53989918743 0 86.17916635 2.626250475000006 @@ -1210,8 +1210,8 @@ conf26 2.53989918743 0 86.17916635 2.626250475000006 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf27 1.61623572141 0 86.333336 2.394996000000006 @@ -1255,8 +1255,8 @@ conf27 1.61623572141 0 86.333336 2.394996000000006 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf28 1.8794021204 0 86.873333375 1.5849999375000081 @@ -1300,8 +1300,8 @@ conf28 1.8794021204 0 86.873333375 1.5849999375000081 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf29 1.91497253508 0 86.65500045 1.912499325000006 @@ -1345,8 +1345,8 @@ conf29 1.91497253508 0 86.65500045 1.912499325000006 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf30 1.91497253508 0 86.86833325 1.5925001250000008 @@ -1390,8 +1390,8 @@ conf30 1.91497253508 0 86.86833325 1.5925001250000008 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf31 1.91497253508 0 86.270834325 2.488748512500017 @@ -1435,8 +1435,8 @@ conf31 1.91497253508 0 86.270834325 2.488748512500017 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf32 1.85188328957 0 86.74000035 1.7849994750000064 @@ -1480,8 +1480,8 @@ conf32 1.85188328957 0 86.74000035 1.7849994750000064 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf33 1.61623572141 0 85.933327 2.995009500000002 @@ -1525,8 +1525,8 @@ conf33 1.61623572141 0 85.933327 2.995009500000002 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf34 1.61623572141 0 85.900002 3.0449970000000093 @@ -1570,8 +1570,8 @@ conf34 1.61623572141 0 85.900002 3.0449970000000093 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf35 1.85188328957 0 86.0941667 2.7537499500000067 @@ -1615,8 +1615,8 @@ conf35 1.85188328957 0 86.0941667 2.7537499500000067 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf36 1.92713044929 0 86.5866669 2.0149996500000142 @@ -1660,8 +1660,8 @@ conf36 1.92713044929 0 86.5866669 2.0149996500000142 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf37 1.92713044929 0 86.339167225 2.3862491625000146 @@ -1705,8 +1705,8 @@ conf37 1.92713044929 0 86.339167225 2.3862491625000146 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf38 2.20742339042 0 86.2999987 2.4450019500000053 @@ -1750,8 +1750,8 @@ conf38 2.20742339042 0 86.2999987 2.4450019500000053 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf39 1.8794021204 0 86.205 2.587500000000013 @@ -1795,8 +1795,8 @@ conf39 1.8794021204 0 86.205 2.587500000000013 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf40 1.92713044929 0 86.79999965 1.6950005250000046 @@ -1840,8 +1840,8 @@ conf40 1.92713044929 0 86.79999965 1.6950005250000046 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf41 1.86955057246 0 87.21333425 1.0749986250000063 @@ -1885,8 +1885,8 @@ conf41 1.86955057246 0 87.21333425 1.0749986250000063 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf42 2.58312845625 0 86.0200018 2.864997300000006 @@ -1930,8 +1930,8 @@ conf42 2.58312845625 0 86.0200018 2.864997300000006 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf43 1.85188328957 0 86.948332525 1.4725012125000134 @@ -1975,8 +1975,8 @@ conf43 1.85188328957 0 86.948332525 1.4725012125000134 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf44 1.8120835937 0 86.98166695 1.4224995750000033 @@ -2020,8 +2020,8 @@ conf44 1.8120835937 0 86.98166695 1.4224995750000033 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf45 2.3805433517 0 86.05916625 2.806250625000004 @@ -2065,8 +2065,8 @@ conf45 2.3805433517 0 86.05916625 2.806250625000004 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf46 2.32939142346 0 86.2841656 2.4687516000000187 @@ -2110,8 +2110,8 @@ conf46 2.32939142346 0 86.2841656 2.4687516000000187 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf47 1.92713044929 0 86.3524993 2.3662510500000025 @@ -2155,8 +2155,8 @@ conf47 1.92713044929 0 86.3524993 2.3662510500000025 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf48 2.49809300707 0 86.02583295 2.8562505750000184 @@ -2200,8 +2200,8 @@ conf48 2.49809300707 0 86.02583295 2.8562505750000184 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf49 1.575539289 0 87.166664 1.1450040000000143 @@ -2245,6 +2245,6 @@ conf49 1.575539289 0 87.166664 1.1450040000000143 38 gpu batchnorm fp16 1 39 gpu relu fp16 1 40 gpu pool_mean fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/resnet18/data/autotuner_data/tuner_confs_batch220.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/resnet18/data/autotuner_data/tuner_confs_batch220.txt index 33c2a45037..99aac99214 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/resnet18/data/autotuner_data/tuner_confs_batch220.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/resnet18/data/autotuner_data/tuner_confs_batch220.txt @@ -45,3196 +45,3196 @@ conf1 1 0 89.4 0 ----- +++++ conf1 1.64662434537 0 88.779999 0.930001500000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 30 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 30 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv perf 28 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv perf 28 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv samp 36 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv samp 36 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv fp16 1 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv fp16 1 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 26 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 26 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv perf 28 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv perf 28 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv fp16 1 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv fp16 1 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf2 1.70770363451 0 88.819992 0.8700120000000098 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 23 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 23 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 30 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 30 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv perf 28 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv perf 28 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 23 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 29 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 29 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 22 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 22 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf3 1.62939140451 0 88.900002 0.7499970000000076 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 30 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 30 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv perf 28 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv perf 28 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv fp16 1 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv fp16 1 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv fp16 1 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv fp16 1 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 26 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 26 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv perf 28 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv perf 28 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv fp16 1 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv fp16 1 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf4 1.67183029784 0 88.800003 0.8999955000000028 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 29 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 29 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 30 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 30 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv perf 29 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv perf 29 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 23 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 25 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 25 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 27 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 27 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv fp16 1 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv fp16 1 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf5 1.67183029784 0 88.760002 0.9599970000000084 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 30 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 30 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv perf 28 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv perf 28 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 25 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 25 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 26 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 26 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv perf 28 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv perf 28 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv fp16 1 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv fp16 1 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf6 1.68608357068 0 88.759995 0.9600075000000032 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 30 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 30 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv perf 28 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv perf 28 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 23 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 26 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 26 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 22 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 22 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf7 1.66422570442 0 89.0 0.6000000000000085 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 30 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 30 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv perf 23 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv perf 23 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv samp 31 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv samp 31 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 26 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 26 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv fp16 1 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv fp16 1 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf8 1.61251543447 0 89.0 0.6000000000000085 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 28 add fp32 1 relu fp32 1 -7 gpu conv perf 30 add fp32 1 +6 gpu conv perf 28 add fp16 1 relu fp16 1 +7 gpu conv perf 30 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 30 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 30 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv perf 28 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv perf 28 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv fp16 1 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv fp16 1 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 35 add fp32 1 relu fp32 1 -24 gpu conv fp16 1 add fp32 1 +23 gpu conv samp 35 add fp16 1 relu fp16 1 +24 gpu conv fp16 1 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 26 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 26 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv fp16 1 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv fp16 1 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf9 1.61607672531 0 88.800003 0.8999955000000028 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 28 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 28 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 30 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 30 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv perf 28 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv perf 28 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv fp16 1 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv fp16 1 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv fp16 1 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv fp16 1 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 26 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 26 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv perf 28 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv perf 28 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv fp16 1 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv fp16 1 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf10 1.68608357068 0 88.759995 0.9600075000000032 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 30 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 30 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv perf 28 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv perf 28 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv samp 31 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv samp 31 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 26 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 26 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv perf 28 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv perf 28 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv fp16 1 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv fp16 1 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf11 1.70852323878 0 88.759995 0.9600075000000032 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 30 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 30 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv perf 28 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv perf 28 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 23 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 27 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 27 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf12 1.65033806434 0 88.87999 0.7800149999999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 30 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 30 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv perf 28 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv perf 28 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 25 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 25 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv fp16 1 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv fp16 1 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 26 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 26 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv perf 28 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv perf 28 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv fp16 1 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv fp16 1 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf13 1.70058196732 0 89.099998 0.4500030000000095 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 30 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 30 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv perf 28 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv perf 28 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 23 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 26 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 26 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 22 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 22 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf14 1.64292730268 0 89.220001 0.5799990000000094 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv fp16 1 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv fp16 1 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv perf 28 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv perf 28 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv samp 31 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv samp 31 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 26 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 26 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv fp16 1 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv fp16 1 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf15 1.7234118184 0 88.880005 0.779992500000013 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 30 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 30 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv perf 28 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv perf 28 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 23 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 27 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 27 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 22 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 22 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf16 1.7234118184 0 88.82 0.8700000000000188 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 30 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 30 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv perf 25 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv perf 25 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 23 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 24 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 24 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 27 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 27 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 22 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 22 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf17 1.61607672531 0 88.919998 0.7200029999999984 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 29 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 29 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 30 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 30 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv perf 24 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv perf 24 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv fp16 1 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv fp16 1 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv fp16 1 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv fp16 1 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 26 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 26 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv perf 28 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv perf 28 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv fp16 1 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv fp16 1 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf18 1.65033806434 0 88.880005 0.779992500000013 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 29 add fp32 1 relu fp32 1 -7 gpu conv perf 29 add fp32 1 +6 gpu conv perf 29 add fp16 1 relu fp16 1 +7 gpu conv perf 29 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 29 add fp32 1 relu fp32 1 -11 gpu conv samp 36 add fp32 1 +10 gpu conv perf 29 add fp16 1 relu fp16 1 +11 gpu conv samp 36 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv perf 23 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv perf 23 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 24 add fp32 1 relu fp32 1 -20 gpu conv perf 30 add fp32 1 +19 gpu conv perf 24 add fp16 1 relu fp16 1 +20 gpu conv perf 30 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 35 add fp32 1 relu fp32 1 -24 gpu conv fp16 1 add fp32 1 +23 gpu conv samp 35 add fp16 1 relu fp16 1 +24 gpu conv fp16 1 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 21 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 21 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf19 1.70830164654 0 88.919998 0.7200029999999984 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 30 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 30 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 23 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 27 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 27 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 22 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 22 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf20 1.70432279259 0 88.900002 0.7499970000000076 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 30 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 30 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv samp 35 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv samp 35 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 27 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 27 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 22 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 22 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf21 1.64292730268 0 88.759995 0.9600075000000032 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 30 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 30 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv perf 28 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv perf 28 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv fp16 1 add fp32 1 relu fp32 1 -20 gpu conv samp 32 add fp32 1 +19 gpu conv fp16 1 add fp16 1 relu fp16 1 +20 gpu conv samp 32 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv fp16 1 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv fp16 1 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 26 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 26 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv perf 28 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv perf 28 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv fp16 1 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv fp16 1 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf22 1.63302771168 0 88.759995 0.9600075000000032 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 29 add fp32 1 relu fp32 1 -7 gpu conv perf 29 add fp32 1 +6 gpu conv perf 29 add fp16 1 relu fp16 1 +7 gpu conv perf 29 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 29 add fp32 1 relu fp32 1 -11 gpu conv fp16 1 add fp32 1 +10 gpu conv perf 29 add fp16 1 relu fp16 1 +11 gpu conv fp16 1 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv perf 23 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv perf 23 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 24 add fp32 1 relu fp32 1 -20 gpu conv perf 30 add fp32 1 +19 gpu conv perf 24 add fp16 1 relu fp16 1 +20 gpu conv perf 30 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 35 add fp32 1 relu fp32 1 -24 gpu conv fp16 1 add fp32 1 +23 gpu conv samp 35 add fp16 1 relu fp16 1 +24 gpu conv fp16 1 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 21 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 21 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf23 1.72257787082 0 88.860001 0.8099985000000132 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 30 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 30 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv perf 30 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv perf 30 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 23 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 23 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 23 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 22 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 22 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf24 1.70726093249 0 88.180008 1.8299880000000073 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 23 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 23 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 -11 gpu conv fp16 1 add fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 +11 gpu conv fp16 1 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv fp16 1 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv fp16 1 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 -20 gpu conv perf 30 add fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 +20 gpu conv perf 30 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 27 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv perf 27 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 21 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 21 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv perf 25 add fp32 1 relu fp32 1 -37 gpu conv perf 23 add fp32 1 +36 gpu conv perf 25 add fp16 1 relu fp16 1 +37 gpu conv perf 23 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf25 1.74663133117 0 88.139999 1.8900015000000039 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 23 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 23 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 -11 gpu conv perf 27 add fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 +11 gpu conv perf 27 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 33 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 -20 gpu conv perf 30 add fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 +20 gpu conv perf 30 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 21 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv perf 21 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 21 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 21 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv perf 25 add fp32 1 relu fp32 1 -37 gpu conv perf 24 add fp32 1 +36 gpu conv perf 25 add fp16 1 relu fp16 1 +37 gpu conv perf 24 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf26 1.70225230451 0 88.100006 1.9499910000000185 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 -11 gpu conv perf 27 add fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 +11 gpu conv perf 27 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv fp16 1 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv fp16 1 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 -20 gpu conv fp16 1 add fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 +20 gpu conv fp16 1 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 36 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 36 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv perf 25 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv perf 25 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf27 1.74554340642 0 88.439995 1.4400075000000143 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 23 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 23 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 -11 gpu conv perf 26 add fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 +11 gpu conv perf 26 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv fp16 1 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv fp16 1 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 -20 gpu conv perf 30 add fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 +20 gpu conv perf 30 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 27 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv perf 27 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 21 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 21 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv perf 25 add fp32 1 relu fp32 1 -37 gpu conv perf 24 add fp32 1 +36 gpu conv perf 25 add fp16 1 relu fp16 1 +37 gpu conv perf 24 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf28 1.74554340642 0 88.220001 1.769998500000014 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 23 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 23 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 -11 gpu conv perf 27 add fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 +11 gpu conv perf 27 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv fp16 1 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv fp16 1 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 -20 gpu conv perf 30 add fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 +20 gpu conv perf 30 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 27 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv perf 27 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 21 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 21 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv perf 25 add fp32 1 relu fp32 1 -37 gpu conv perf 24 add fp32 1 +36 gpu conv perf 25 add fp16 1 relu fp16 1 +37 gpu conv perf 24 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf29 1.72318634755 0 88.82 0.8700000000000188 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 29 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 29 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 -11 gpu conv perf 27 add fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 +11 gpu conv perf 27 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 -20 gpu conv perf 30 add fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 +20 gpu conv perf 30 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 27 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv perf 27 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 21 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 21 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 24 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 24 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf30 1.74663133117 0 88.199997 1.8000045000000142 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 23 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 23 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 -11 gpu conv perf 27 add fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 +11 gpu conv perf 27 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 33 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 -20 gpu conv perf 30 add fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 +20 gpu conv perf 30 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 27 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv perf 27 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 21 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 21 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv perf 25 add fp32 1 relu fp32 1 -37 gpu conv perf 24 add fp32 1 +36 gpu conv perf 25 add fp16 1 relu fp16 1 +37 gpu conv perf 24 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf31 1.74030798425 0 88.199997 1.8000045000000142 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 23 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 23 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 -11 gpu conv perf 27 add fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 +11 gpu conv perf 27 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv fp16 1 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv fp16 1 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 -20 gpu conv perf 30 add fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 +20 gpu conv perf 30 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 27 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv perf 27 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 36 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 36 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv perf 25 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv perf 25 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf32 1.71913793693 0 88.82 0.8700000000000188 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 23 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 23 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 -11 gpu conv perf 27 add fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 +11 gpu conv perf 27 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 33 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 -20 gpu conv samp 34 add fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 +20 gpu conv samp 34 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 27 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv perf 27 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 21 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 21 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 24 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 24 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf33 1.7112535329 0 88.240005 1.7399925000000138 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 29 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 29 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 -11 gpu conv perf 29 add fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 +11 gpu conv perf 29 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 -20 gpu conv perf 30 add fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 +20 gpu conv perf 30 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 36 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 36 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv perf 25 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv perf 25 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf34 1.72512732689 0 88.220001 1.769998500000014 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 -11 gpu conv perf 27 add fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 +11 gpu conv perf 27 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv fp16 1 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv fp16 1 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 -20 gpu conv perf 30 add fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 +20 gpu conv perf 30 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 36 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 36 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv perf 25 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv perf 25 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf35 1.74138939072 0 88.340004 1.5899940000000186 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 23 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 23 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 -11 gpu conv perf 27 add fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 +11 gpu conv perf 27 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv fp16 1 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv fp16 1 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 -20 gpu conv perf 30 add fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 +20 gpu conv perf 30 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 27 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv perf 27 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 21 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 21 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv perf 25 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv perf 25 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf36 1.74554340642 0 88.139999 1.8900015000000039 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 23 add fp32 1 relu fp32 1 -7 gpu conv perf 29 add fp32 1 +6 gpu conv perf 23 add fp16 1 relu fp16 1 +7 gpu conv perf 29 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 -11 gpu conv perf 27 add fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 +11 gpu conv perf 27 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 33 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 -20 gpu conv perf 30 add fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 +20 gpu conv perf 30 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 27 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv perf 27 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 36 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 36 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv perf 25 add fp32 1 relu fp32 1 -37 gpu conv perf 24 add fp32 1 +36 gpu conv perf 25 add fp16 1 relu fp16 1 +37 gpu conv perf 24 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf37 1.69394144012 0 87.419998 2.9700029999999984 -1 gpu conv perf 24 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv perf 24 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv samp 33 add fp32 1 relu fp32 1 -11 gpu conv perf 30 add fp32 1 +10 gpu conv samp 33 add fp16 1 relu fp16 1 +11 gpu conv perf 30 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv perf 22 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv perf 22 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 24 add fp32 1 relu fp32 1 -20 gpu conv perf 23 add fp32 1 +19 gpu conv perf 24 add fp16 1 relu fp16 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 28 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 28 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 36 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 36 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf38 1.75155546758 0 87.580002 2.7299970000000187 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv samp 35 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv samp 35 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf39 1.76855946151 0 87.459999 2.910001500000014 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 23 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 23 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv samp 32 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv samp 32 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 33 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 33 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf40 1.67512594279 0 87.760002 2.4599970000000084 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv fp16 1 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv fp16 1 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv fp16 1 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv fp16 1 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf41 1.75265090172 0 87.440002 2.939996999999998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 21 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 21 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf42 1.76879696322 0 87.419998 2.9700029999999984 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 23 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 23 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv samp 32 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv samp 32 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv samp 31 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv samp 31 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 30 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 30 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf43 1.71405948254 0 87.639999 2.640001500000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv fp16 1 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv fp16 1 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 21 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 21 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf44 1.7372550994 0 87.419998 2.9700029999999984 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 21 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 21 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf45 1.70857587557 0 87.559998 2.760003000000019 -1 gpu conv perf 24 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv perf 24 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv samp 33 add fp32 1 relu fp32 1 -11 gpu conv perf 30 add fp32 1 +10 gpu conv samp 33 add fp16 1 relu fp16 1 +11 gpu conv perf 30 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv perf 22 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv perf 22 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 24 add fp32 1 relu fp32 1 -20 gpu conv perf 23 add fp32 1 +19 gpu conv perf 24 add fp16 1 relu fp16 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 36 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 36 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf46 1.75311745961 0 87.479996 2.8800060000000087 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 23 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 23 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 23 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 23 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv samp 32 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv samp 32 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 30 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 30 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf47 1.74892735975 0 87.520004 2.8199940000000083 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 23 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 23 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv samp 35 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv samp 35 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 30 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 30 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf48 1.7372550994 0 87.5 2.8500000000000085 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 21 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 21 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf49 1.75265090172 0 87.660004 2.6099940000000075 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv fp16 1 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv fp16 1 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 22 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 22 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf50 1.7331403923 0 87.459999 2.910001500000014 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv samp 33 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv samp 33 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 21 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 21 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf51 1.7372550994 0 87.419998 2.9700029999999984 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 25 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 25 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 21 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 21 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 25 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 25 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf52 1.75374770691 0 87.419998 2.9700029999999984 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 21 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 21 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 33 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 33 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf53 1.73833271397 0 87.5 2.8500000000000085 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 21 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 21 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 33 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 33 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf54 1.76879696322 0 87.460007 2.909989500000002 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 23 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 23 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv samp 32 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv samp 32 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 30 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 30 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf55 1.76744405724 0 87.819992 2.3700120000000098 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv fp16 1 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv fp16 1 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv samp 32 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv samp 32 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 30 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 30 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf56 1.75398124685 0 87.520004 2.8199940000000083 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 27 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 27 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 29 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 29 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 29 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 29 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf57 1.73640770396 0 87.639999 2.640001500000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 23 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 23 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 23 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 23 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf58 1.7372550994 0 87.5 2.8500000000000085 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 24 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 24 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 21 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 21 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf59 1.73833271397 0 87.400002 2.9999970000000076 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 23 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 23 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 21 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 21 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 33 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 33 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf60 1.75155546758 0 87.659996 2.6100059999999985 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv samp 33 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 33 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 33 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf61 1.73833271397 0 87.480003 2.879995500000014 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 25 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 25 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 21 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 21 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 36 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 36 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf62 1.73336847573 0 87.520004 2.8199940000000083 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv samp 35 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv samp 35 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 28 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 28 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 33 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 33 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf63 1.7372550994 0 87.419998 2.9700029999999984 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 -11 gpu conv perf 23 add fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 +11 gpu conv perf 23 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 21 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 21 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf64 1.67876351791 0 87.519997 2.820004500000003 -1 gpu conv perf 24 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv perf 24 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 24 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 24 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv samp 33 add fp32 1 relu fp32 1 -11 gpu conv perf 30 add fp32 1 +10 gpu conv samp 33 add fp16 1 relu fp16 1 +11 gpu conv perf 30 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv perf 23 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv perf 23 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 24 add fp32 1 relu fp32 1 -20 gpu conv perf 23 add fp32 1 +19 gpu conv perf 24 add fp16 1 relu fp16 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 28 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 28 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 36 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 36 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf65 1.7372550994 0 87.599998 2.7000030000000095 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 29 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 29 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 21 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 21 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 25 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 25 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf66 1.76855946151 0 87.840004 2.3399940000000186 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv samp 33 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv samp 32 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv samp 32 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 27 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 27 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 30 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 30 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf67 1.75311745961 0 87.580002 2.7299970000000187 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 23 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 23 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv samp 32 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv samp 32 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 30 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 30 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf68 1.75265090172 0 87.599998 2.7000030000000095 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 26 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 26 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 21 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 21 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf69 1.7372550994 0 87.660004 2.6099940000000075 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 25 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 25 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 21 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 21 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv samp 31 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv samp 31 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf70 1.69313576306 0 87.639999 2.640001500000004 -1 gpu conv perf 24 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv perf 24 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv samp 33 add fp32 1 relu fp32 1 -11 gpu conv perf 30 add fp32 1 +10 gpu conv samp 33 add fp16 1 relu fp16 1 +11 gpu conv perf 30 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv perf 23 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv perf 23 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 24 add fp32 1 relu fp32 1 -20 gpu conv perf 23 add fp32 1 +19 gpu conv perf 24 add fp16 1 relu fp16 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 28 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 28 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 36 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 36 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf71 1.69313576306 0 87.479996 2.8800060000000087 -1 gpu conv perf 24 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv perf 24 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv samp 33 add fp32 1 relu fp32 1 -11 gpu conv perf 30 add fp32 1 +10 gpu conv samp 33 add fp16 1 relu fp16 1 +11 gpu conv perf 30 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv perf 29 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv perf 29 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 24 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 24 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 28 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 28 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 36 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 36 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/resnet18/data/autotuner_data/tuner_pareto_confs_batch220.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/resnet18/data/autotuner_data/tuner_pareto_confs_batch220.txt index d9a357b823..eaafddc7dd 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/resnet18/data/autotuner_data/tuner_pareto_confs_batch220.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/resnet18/data/autotuner_data/tuner_pareto_confs_batch220.txt @@ -45,2386 +45,2386 @@ conf1 1 0 89.4 0 ----- +++++ conf1 1.70770363451 0 88.819992 0.8700120000000098 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 23 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 23 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 30 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 30 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv perf 28 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv perf 28 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 23 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 29 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 29 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 22 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 22 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf2 1.68608357068 0 88.759995 0.9600075000000032 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 30 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 30 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv perf 28 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv perf 28 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 23 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 26 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 26 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 22 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 22 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf3 1.66422570442 0 89.0 0.6000000000000085 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 30 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 30 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv perf 23 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv perf 23 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv samp 31 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv samp 31 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 26 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 26 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv fp16 1 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv fp16 1 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf4 1.68608357068 0 88.759995 0.9600075000000032 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 30 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 30 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv perf 28 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv perf 28 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv samp 31 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv samp 31 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 26 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 26 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv perf 28 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv perf 28 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv fp16 1 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv fp16 1 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf5 1.70852323878 0 88.759995 0.9600075000000032 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 30 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 30 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv perf 28 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv perf 28 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 23 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 27 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 27 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf6 1.70058196732 0 89.099998 0.4500030000000095 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 30 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 30 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv perf 28 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv perf 28 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 23 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 26 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 26 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 22 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 22 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf7 1.64292730268 0 89.220001 0.5799990000000094 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv fp16 1 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv fp16 1 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv perf 28 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv perf 28 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv samp 31 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv samp 31 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 26 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 26 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv fp16 1 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv fp16 1 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf8 1.7234118184 0 88.880005 0.779992500000013 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 30 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 30 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv perf 28 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv perf 28 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 23 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 27 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 27 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 22 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 22 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf9 1.7234118184 0 88.82 0.8700000000000188 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 30 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 30 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv perf 25 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv perf 25 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 23 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 24 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 24 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 27 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 27 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 22 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 22 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf10 1.70830164654 0 88.919998 0.7200029999999984 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 30 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 30 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 23 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 27 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 27 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 22 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 22 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf11 1.70432279259 0 88.900002 0.7499970000000076 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 30 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 30 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv samp 35 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv samp 35 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 27 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 27 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 22 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 22 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf12 1.72257787082 0 88.860001 0.8099985000000132 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 30 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 30 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv perf 30 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv perf 30 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 23 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 23 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 23 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 22 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 22 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf13 1.70726093249 0 88.180008 1.8299880000000073 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 23 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 23 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 -11 gpu conv fp16 1 add fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 +11 gpu conv fp16 1 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv fp16 1 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv fp16 1 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 -20 gpu conv perf 30 add fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 +20 gpu conv perf 30 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 27 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv perf 27 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 21 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 21 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv perf 25 add fp32 1 relu fp32 1 -37 gpu conv perf 23 add fp32 1 +36 gpu conv perf 25 add fp16 1 relu fp16 1 +37 gpu conv perf 23 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf14 1.74663133117 0 88.139999 1.8900015000000039 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 23 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 23 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 -11 gpu conv perf 27 add fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 +11 gpu conv perf 27 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 33 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 -20 gpu conv perf 30 add fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 +20 gpu conv perf 30 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 21 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv perf 21 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 21 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 21 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv perf 25 add fp32 1 relu fp32 1 -37 gpu conv perf 24 add fp32 1 +36 gpu conv perf 25 add fp16 1 relu fp16 1 +37 gpu conv perf 24 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf15 1.70225230451 0 88.100006 1.9499910000000185 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 -11 gpu conv perf 27 add fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 +11 gpu conv perf 27 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv fp16 1 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv fp16 1 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 -20 gpu conv fp16 1 add fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 +20 gpu conv fp16 1 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 36 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 36 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv perf 25 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv perf 25 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf16 1.74554340642 0 88.439995 1.4400075000000143 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 23 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 23 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 -11 gpu conv perf 26 add fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 +11 gpu conv perf 26 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv fp16 1 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv fp16 1 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 -20 gpu conv perf 30 add fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 +20 gpu conv perf 30 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 27 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv perf 27 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 21 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 21 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv perf 25 add fp32 1 relu fp32 1 -37 gpu conv perf 24 add fp32 1 +36 gpu conv perf 25 add fp16 1 relu fp16 1 +37 gpu conv perf 24 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf17 1.74554340642 0 88.220001 1.769998500000014 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 23 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 23 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 -11 gpu conv perf 27 add fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 +11 gpu conv perf 27 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv fp16 1 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv fp16 1 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 -20 gpu conv perf 30 add fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 +20 gpu conv perf 30 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 27 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv perf 27 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 21 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 21 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv perf 25 add fp32 1 relu fp32 1 -37 gpu conv perf 24 add fp32 1 +36 gpu conv perf 25 add fp16 1 relu fp16 1 +37 gpu conv perf 24 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf18 1.72318634755 0 88.82 0.8700000000000188 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 29 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 29 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 -11 gpu conv perf 27 add fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 +11 gpu conv perf 27 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 -20 gpu conv perf 30 add fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 +20 gpu conv perf 30 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 27 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv perf 27 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 21 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 21 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 24 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 24 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf19 1.74663133117 0 88.199997 1.8000045000000142 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 23 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 23 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 -11 gpu conv perf 27 add fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 +11 gpu conv perf 27 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 33 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 -20 gpu conv perf 30 add fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 +20 gpu conv perf 30 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 27 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv perf 27 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 21 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 21 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv perf 25 add fp32 1 relu fp32 1 -37 gpu conv perf 24 add fp32 1 +36 gpu conv perf 25 add fp16 1 relu fp16 1 +37 gpu conv perf 24 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf20 1.74030798425 0 88.199997 1.8000045000000142 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 23 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 23 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 -11 gpu conv perf 27 add fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 +11 gpu conv perf 27 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv fp16 1 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv fp16 1 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 -20 gpu conv perf 30 add fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 +20 gpu conv perf 30 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 27 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv perf 27 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 36 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 36 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv perf 25 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv perf 25 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf21 1.71913793693 0 88.82 0.8700000000000188 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 23 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 23 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 -11 gpu conv perf 27 add fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 +11 gpu conv perf 27 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 33 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 -20 gpu conv samp 34 add fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 +20 gpu conv samp 34 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 27 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv perf 27 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 21 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 21 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 24 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 24 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf22 1.7112535329 0 88.240005 1.7399925000000138 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 29 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 29 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 -11 gpu conv perf 29 add fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 +11 gpu conv perf 29 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 -20 gpu conv perf 30 add fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 +20 gpu conv perf 30 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 36 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 36 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv perf 25 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv perf 25 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf23 1.72512732689 0 88.220001 1.769998500000014 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 -11 gpu conv perf 27 add fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 +11 gpu conv perf 27 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv fp16 1 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv fp16 1 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 -20 gpu conv perf 30 add fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 +20 gpu conv perf 30 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 36 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 36 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv perf 25 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv perf 25 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf24 1.74138939072 0 88.340004 1.5899940000000186 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 23 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 23 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 -11 gpu conv perf 27 add fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 +11 gpu conv perf 27 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv fp16 1 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv fp16 1 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 -20 gpu conv perf 30 add fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 +20 gpu conv perf 30 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 27 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv perf 27 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 21 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 21 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv perf 25 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv perf 25 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf25 1.74554340642 0 88.139999 1.8900015000000039 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 23 add fp32 1 relu fp32 1 -7 gpu conv perf 29 add fp32 1 +6 gpu conv perf 23 add fp16 1 relu fp16 1 +7 gpu conv perf 29 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 -11 gpu conv perf 27 add fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 +11 gpu conv perf 27 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 33 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 -20 gpu conv perf 30 add fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 +20 gpu conv perf 30 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 27 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv perf 27 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 36 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 36 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv perf 25 add fp32 1 relu fp32 1 -37 gpu conv perf 24 add fp32 1 +36 gpu conv perf 25 add fp16 1 relu fp16 1 +37 gpu conv perf 24 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf26 1.75155546758 0 87.580002 2.7299970000000187 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv samp 35 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv samp 35 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf27 1.76855946151 0 87.459999 2.910001500000014 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 23 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 23 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv samp 32 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv samp 32 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 33 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 33 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf28 1.75265090172 0 87.440002 2.939996999999998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 21 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 21 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf29 1.76879696322 0 87.419998 2.9700029999999984 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 23 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 23 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv samp 32 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv samp 32 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv samp 31 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv samp 31 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 30 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 30 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf30 1.7372550994 0 87.419998 2.9700029999999984 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 21 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 21 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf31 1.75311745961 0 87.479996 2.8800060000000087 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 23 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 23 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 23 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 23 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv samp 32 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv samp 32 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 30 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 30 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf32 1.74892735975 0 87.520004 2.8199940000000083 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 23 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 23 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv samp 35 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv samp 35 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 30 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 30 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf33 1.7372550994 0 87.5 2.8500000000000085 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 21 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 21 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf34 1.75265090172 0 87.660004 2.6099940000000075 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv fp16 1 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv fp16 1 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 22 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 22 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf35 1.7331403923 0 87.459999 2.910001500000014 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv samp 33 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv samp 33 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 21 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 21 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf36 1.7372550994 0 87.419998 2.9700029999999984 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 25 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 25 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 21 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 21 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 25 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 25 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf37 1.75374770691 0 87.419998 2.9700029999999984 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 21 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 21 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 33 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 33 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf38 1.73833271397 0 87.5 2.8500000000000085 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 21 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 21 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 33 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 33 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf39 1.76879696322 0 87.460007 2.909989500000002 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 23 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 23 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv samp 32 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv samp 32 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 30 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 30 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf40 1.76744405724 0 87.819992 2.3700120000000098 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv fp16 1 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv fp16 1 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv samp 32 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv samp 32 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 30 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 30 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf41 1.75398124685 0 87.520004 2.8199940000000083 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 27 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 27 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 29 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 29 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 29 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 29 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf42 1.73640770396 0 87.639999 2.640001500000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 23 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 23 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 23 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 23 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf43 1.7372550994 0 87.5 2.8500000000000085 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 24 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 24 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 21 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 21 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf44 1.73833271397 0 87.400002 2.9999970000000076 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 23 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 23 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 21 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 21 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 33 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 33 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf45 1.75155546758 0 87.659996 2.6100059999999985 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv samp 33 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 33 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 33 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf46 1.73833271397 0 87.480003 2.879995500000014 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 25 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 25 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 21 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 21 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 36 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 36 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf47 1.73336847573 0 87.520004 2.8199940000000083 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv samp 35 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv samp 35 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 28 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 28 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 33 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 33 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf48 1.7372550994 0 87.419998 2.9700029999999984 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 -11 gpu conv perf 23 add fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 +11 gpu conv perf 23 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 21 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 21 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf49 1.7372550994 0 87.599998 2.7000030000000095 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 29 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 29 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 21 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 21 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 25 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 25 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf50 1.76855946151 0 87.840004 2.3399940000000186 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv samp 33 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv samp 32 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv samp 32 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 27 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 27 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 30 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 30 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf51 1.75311745961 0 87.580002 2.7299970000000187 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 23 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 23 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv samp 32 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv samp 32 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 30 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 30 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf52 1.75265090172 0 87.599998 2.7000030000000095 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 26 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 26 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 21 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 21 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf53 1.7372550994 0 87.660004 2.6099940000000075 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 25 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 25 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv perf 29 add fp32 1 -16 gpu conv perf 21 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv perf 29 add fp16 1 +16 gpu conv perf 21 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 -24 gpu conv samp 31 add fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 +24 gpu conv samp 31 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/resnet18/data/autotuner_data/tuner_promise_confs_batch220_multi.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/resnet18/data/autotuner_data/tuner_promise_confs_batch220_multi.txt index afc08a8a47..793a33e54e 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/resnet18/data/autotuner_data/tuner_promise_confs_batch220_multi.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/resnet18/data/autotuner_data/tuner_promise_confs_batch220_multi.txt @@ -45,65 +45,65 @@ conf1 1 0 89.4 0 ----- +++++ conf1 1.99031144732 0 89.0290005 0.5564992500000159 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf2 2.57757857769 0 87.713000325 2.5304995125000147 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 6 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 -11 gpu conv perf 22 add fp32 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 7 16 promise swing_level 7 17 gpu add fp16 1 @@ -113,16 +113,16 @@ conf2 2.57757857769 0 87.713000325 2.5304995125000147 21 gpu add fp16 1 22 gpu relu fp16 1 23 promise swing_level 7 -24 gpu conv perf 27 add fp32 1 +24 gpu conv perf 27 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 6 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -130,13 +130,13 @@ conf2 2.57757857769 0 87.713000325 2.5304995125000147 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf3 3.02709182826 0 87.506999275 2.839501087500011 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 @@ -145,10 +145,10 @@ conf3 3.02709182826 0 87.506999275 2.839501087500011 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 -11 gpu conv perf 22 add fp32 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 7 16 promise swing_level 3 17 gpu add fp16 1 @@ -161,12 +161,12 @@ conf3 3.02709182826 0 87.506999275 2.839501087500011 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 6 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 7 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -175,13 +175,13 @@ conf3 3.02709182826 0 87.506999275 2.839501087500011 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf4 2.82393934796 0 87.531501575 2.802747637500019 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 @@ -190,10 +190,10 @@ conf4 2.82393934796 0 87.531501575 2.802747637500019 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 -11 gpu conv perf 22 add fp32 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 7 16 promise swing_level 3 17 gpu add fp16 1 @@ -206,13 +206,13 @@ conf4 2.82393934796 0 87.531501575 2.802747637500019 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 6 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -220,25 +220,25 @@ conf4 2.82393934796 0 87.531501575 2.802747637500019 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf5 2.9277889247 0 87.56699995 2.7495000750000145 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 6 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 -11 gpu conv perf 22 add fp32 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 7 16 promise swing_level 6 17 gpu add fp16 1 @@ -251,12 +251,12 @@ conf5 2.9277889247 0 87.56699995 2.7495000750000145 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 6 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 7 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -265,25 +265,25 @@ conf5 2.9277889247 0 87.56699995 2.7495000750000145 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf6 2.73705987921 0 87.842499925 2.336250112500011 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 6 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 -11 gpu conv perf 22 add fp32 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 7 16 promise swing_level 3 17 gpu add fp16 1 @@ -296,13 +296,13 @@ conf6 2.73705987921 0 87.842499925 2.336250112500011 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 6 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -310,25 +310,25 @@ conf6 2.73705987921 0 87.842499925 2.336250112500011 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf7 2.61712759428 0 87.57399955 2.739000675000014 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 6 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 7 16 promise swing_level 7 17 gpu add fp16 1 @@ -341,13 +341,13 @@ conf7 2.61712759428 0 87.57399955 2.739000675000014 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 6 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -355,25 +355,25 @@ conf7 2.61712759428 0 87.57399955 2.739000675000014 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf8 2.92641312608 0 87.6109998 2.6835003000000057 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 6 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 -11 gpu conv perf 22 add fp32 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 7 16 promise swing_level 7 17 gpu add fp16 1 @@ -386,12 +386,12 @@ conf8 2.92641312608 0 87.6109998 2.6835003000000057 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 6 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 7 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -400,25 +400,25 @@ conf8 2.92641312608 0 87.6109998 2.6835003000000057 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf9 2.9302379857 0 87.59900045 2.7014993250000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 6 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 -11 gpu conv perf 22 add fp32 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 7 16 promise swing_level 3 17 gpu add fp16 1 @@ -431,12 +431,12 @@ conf9 2.9302379857 0 87.59900045 2.7014993250000003 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 6 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 7 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -445,25 +445,25 @@ conf9 2.9302379857 0 87.59900045 2.7014993250000003 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf10 2.9302379857 0 87.531999525 2.8020007125000035 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 6 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 -11 gpu conv perf 26 add fp32 1 +11 gpu conv perf 26 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 7 16 promise swing_level 3 17 gpu add fp16 1 @@ -476,12 +476,12 @@ conf10 2.9302379857 0 87.531999525 2.8020007125000035 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 6 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 5 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -490,25 +490,25 @@ conf10 2.9302379857 0 87.531999525 2.8020007125000035 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf11 2.55711655143 0 87.684999175 2.5725012375000063 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 5 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 6 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 -11 gpu conv samp 33 add fp32 1 +11 gpu conv samp 33 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 7 16 promise swing_level 7 17 gpu add fp16 1 @@ -518,16 +518,16 @@ conf11 2.55711655143 0 87.684999175 2.5725012375000063 21 gpu add fp16 1 22 gpu relu fp16 1 23 promise swing_level 7 -24 gpu conv samp 31 add fp32 1 +24 gpu conv samp 31 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -535,13 +535,13 @@ conf11 2.55711655143 0 87.684999175 2.5725012375000063 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf12 3.07188703492 0 87.516000375 2.825999437500002 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 @@ -550,10 +550,10 @@ conf12 3.07188703492 0 87.516000375 2.825999437500002 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 -11 gpu conv perf 22 add fp32 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 7 16 promise swing_level 3 17 gpu add fp16 1 @@ -566,12 +566,12 @@ conf12 3.07188703492 0 87.516000375 2.825999437500002 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 6 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 5 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -580,13 +580,13 @@ conf12 3.07188703492 0 87.516000375 2.825999437500002 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf13 2.97102613385 0 87.504499975 2.843250037500006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 @@ -594,11 +594,11 @@ conf13 2.97102613385 0 87.504499975 2.843250037500006 7 promise swing_level 5 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 22 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 5 16 promise swing_level 3 17 gpu add fp16 1 @@ -611,12 +611,12 @@ conf13 2.97102613385 0 87.504499975 2.843250037500006 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 5 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -625,44 +625,44 @@ conf13 2.97102613385 0 87.504499975 2.843250037500006 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf14 2.64018111015 0 87.69499965 2.557500525000009 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 6 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 -11 gpu conv perf 22 add fp32 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 6 16 promise swing_level 3 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 3 -20 gpu conv perf 29 add fp32 1 +20 gpu conv perf 29 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 23 promise swing_level 6 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 6 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -670,25 +670,25 @@ conf14 2.64018111015 0 87.69499965 2.557500525000009 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf15 2.90090613007 0 87.567499975 2.7487500375000025 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 6 promise swing_level 6 -7 gpu conv perf 28 add fp32 1 +7 gpu conv perf 28 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 -11 gpu conv perf 22 add fp32 1 +11 gpu conv perf 22 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 7 16 promise swing_level 3 17 gpu add fp16 1 @@ -701,12 +701,12 @@ conf15 2.90090613007 0 87.567499975 2.7487500375000025 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 6 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 5 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -715,13 +715,13 @@ conf15 2.90090613007 0 87.567499975 2.7487500375000025 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf16 2.71547033787 0 87.609499925 2.685750112500017 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 @@ -729,29 +729,29 @@ conf16 2.71547033787 0 87.609499925 2.685750112500017 7 promise swing_level 3 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 26 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 26 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 7 -16 gpu conv samp 33 add fp32 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 -20 gpu conv samp 32 add fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 +20 gpu conv samp 32 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 23 promise swing_level 6 -24 gpu conv samp 31 add fp32 1 +24 gpu conv samp 31 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 5 29 promise swing_level 3 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 5 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -760,13 +760,13 @@ conf16 2.71547033787 0 87.609499925 2.685750112500017 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf17 2.98813286738 0 87.5435004 2.7847494000000097 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 @@ -774,29 +774,29 @@ conf17 2.98813286738 0 87.5435004 2.7847494000000097 7 promise swing_level 3 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 26 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 26 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 7 16 promise swing_level 5 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv samp 32 add fp32 1 +20 gpu conv samp 32 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 23 promise swing_level 6 24 promise swing_level 5 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 5 29 promise swing_level 3 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 5 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -805,43 +805,43 @@ conf17 2.98813286738 0 87.5435004 2.7847494000000097 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf18 2.56902434302 0 87.606999175 2.6895012375000107 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 26 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 26 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 7 16 promise swing_level 5 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv samp 32 add fp32 1 +20 gpu conv samp 32 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 23 promise swing_level 6 24 promise swing_level 5 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 3 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 3 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -850,13 +850,13 @@ conf18 2.56902434302 0 87.606999175 2.6895012375000107 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf19 2.79012299314 0 87.67950025 2.580749625000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 @@ -864,29 +864,29 @@ conf19 2.79012299314 0 87.67950025 2.580749625000003 7 promise swing_level 3 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 -11 gpu conv perf 26 add fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 +11 gpu conv perf 26 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 7 -16 gpu conv fp16 1 add fp32 1 +16 gpu conv fp16 1 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv samp 32 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv samp 32 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 23 promise swing_level 7 24 promise swing_level 5 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 5 -29 gpu conv perf 29 add fp32 1 +29 gpu conv perf 29 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 3 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -895,13 +895,13 @@ conf19 2.79012299314 0 87.67950025 2.580749625000003 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf20 2.73489457491 0 87.6280004 2.6579994000000013 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 @@ -909,29 +909,29 @@ conf20 2.73489457491 0 87.6280004 2.6579994000000013 7 promise swing_level 7 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 -11 gpu conv perf 23 add fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 +11 gpu conv perf 23 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 7 16 promise swing_level 5 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 25 add fp32 1 relu fp32 1 -20 gpu conv samp 32 add fp32 1 +19 gpu conv perf 25 add fp16 1 relu fp16 1 +20 gpu conv samp 32 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 23 promise swing_level 6 24 promise swing_level 5 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 3 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 3 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -940,44 +940,44 @@ conf20 2.73489457491 0 87.6280004 2.6579994000000013 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf21 2.44087693138 0 87.82049955 2.3692506750000177 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 23 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 23 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 6 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 6 16 promise swing_level 6 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 25 add fp32 1 relu fp32 1 -20 gpu conv samp 32 add fp32 1 +19 gpu conv perf 25 add fp16 1 relu fp16 1 +20 gpu conv samp 32 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 33 add fp32 1 relu fp32 1 +23 gpu conv samp 33 add fp16 1 relu fp16 1 24 promise swing_level 6 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 7 @@ -985,62 +985,62 @@ conf21 2.44087693138 0 87.82049955 2.3692506750000177 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf22 2.3321681524 0 87.9240005 2.2139992500000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 5 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 6 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 3 -16 gpu conv perf 28 add fp32 1 +16 gpu conv perf 28 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv samp 32 add fp32 1 +20 gpu conv samp 32 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 23 promise swing_level 7 -24 gpu conv perf 30 add fp32 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 -29 gpu conv samp 33 add fp32 1 +29 gpu conv samp 33 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf23 2.53070583756 0 87.55950105 2.760748425000017 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 5 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -1048,26 +1048,26 @@ conf23 2.53070583756 0 87.55950105 2.760748425000017 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 3 -16 gpu conv perf 23 add fp32 1 +16 gpu conv perf 23 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv samp 35 add fp32 1 +20 gpu conv samp 35 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 24 promise swing_level 5 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 33 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 33 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -1075,43 +1075,43 @@ conf23 2.53070583756 0 87.55950105 2.760748425000017 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf24 2.68292997945 0 87.5750001 2.737499850000013 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 3 -16 gpu conv perf 23 add fp32 1 +16 gpu conv perf 23 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv samp 32 add fp32 1 +20 gpu conv samp 32 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 24 promise swing_level 5 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 6 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 7 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -1120,44 +1120,44 @@ conf24 2.68292997945 0 87.5750001 2.737499850000013 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf25 2.66170389313 0 87.728001375 2.5079979375000008 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 5 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 3 -16 gpu conv perf 23 add fp32 1 +16 gpu conv perf 23 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv samp 32 add fp32 1 +20 gpu conv samp 32 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 24 promise swing_level 5 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 6 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -1165,43 +1165,43 @@ conf25 2.66170389313 0 87.728001375 2.5079979375000008 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf26 2.86760965959 0 87.4914997 2.86275045 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 5 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 6 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 3 -16 gpu conv perf 23 add fp32 1 +16 gpu conv perf 23 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv samp 32 add fp32 1 +20 gpu conv samp 32 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 24 promise swing_level 5 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 6 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 7 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -1210,44 +1210,44 @@ conf26 2.86760965959 0 87.4914997 2.86275045 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf27 2.67414927982 0 87.5269998 2.8095003000000105 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 5 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 6 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 3 -16 gpu conv perf 23 add fp32 1 +16 gpu conv perf 23 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv samp 32 add fp32 1 +20 gpu conv samp 32 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 24 promise swing_level 5 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 -29 gpu conv samp 36 add fp32 1 +29 gpu conv samp 36 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -1255,44 +1255,44 @@ conf27 2.67414927982 0 87.5269998 2.8095003000000105 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf28 2.68311033561 0 87.65149905 2.6227514250000112 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 5 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 6 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 3 16 promise swing_level 5 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv samp 32 add fp32 1 +20 gpu conv samp 32 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 24 promise swing_level 5 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 -29 gpu conv perf 30 add fp32 1 +29 gpu conv perf 30 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -1300,44 +1300,44 @@ conf28 2.68311033561 0 87.65149905 2.6227514250000112 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf29 2.53325144094 0 87.598500275 2.702249587499999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 5 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 6 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 3 -16 gpu conv perf 23 add fp32 1 +16 gpu conv perf 23 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv samp 32 add fp32 1 +20 gpu conv samp 32 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 30 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 30 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -1345,43 +1345,43 @@ conf29 2.53325144094 0 87.598500275 2.702249587499999 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf30 2.82534166516 0 87.54249935 2.7862509750000086 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 5 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 23 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 23 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 6 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 3 16 promise swing_level 7 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv samp 32 add fp32 1 +20 gpu conv samp 32 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 24 promise swing_level 5 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 -29 gpu conv perf 30 add fp32 1 +29 gpu conv perf 30 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 7 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -1390,44 +1390,44 @@ conf30 2.82534166516 0 87.54249935 2.7862509750000086 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf31 2.65417336849 0 87.6089993 2.686501050000018 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 5 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 3 -16 gpu conv perf 23 add fp32 1 +16 gpu conv perf 23 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv samp 32 add fp32 1 +20 gpu conv samp 32 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 24 promise swing_level 5 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 -29 gpu conv perf 30 add fp32 1 +29 gpu conv perf 30 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -1435,89 +1435,89 @@ conf31 2.65417336849 0 87.6089993 2.686501050000018 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf32 2.4911131732 0 87.802000025 2.3969999625000113 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 5 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 6 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 3 16 promise swing_level 3 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv samp 32 add fp32 1 +20 gpu conv samp 32 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 24 promise swing_level 5 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 -29 gpu conv perf 30 add fp32 1 +29 gpu conv perf 30 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 37 promise swing_level 5 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf33 2.40302026058 0 87.854000425 2.3189993625000156 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 5 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 6 -16 gpu conv perf 23 add fp32 1 +16 gpu conv perf 23 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 -20 gpu conv samp 32 add fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 +20 gpu conv samp 32 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 24 promise swing_level 5 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 5 -29 gpu conv perf 22 add fp32 1 +29 gpu conv perf 22 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 7 @@ -1525,25 +1525,25 @@ conf33 2.40302026058 0 87.854000425 2.3189993625000156 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf34 2.84804665449 0 87.740999925 2.488500112500013 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 6 11 promise swing_level 3 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 6 16 promise swing_level 5 17 gpu add fp16 1 @@ -1552,17 +1552,17 @@ conf34 2.84804665449 0 87.740999925 2.488500112500013 20 promise swing_level 5 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 33 add fp32 1 relu fp32 1 +23 gpu conv samp 33 add fp16 1 relu fp16 1 24 promise swing_level 7 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 5 29 promise swing_level 3 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -1570,43 +1570,43 @@ conf34 2.84804665449 0 87.740999925 2.488500112500013 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf35 2.61969640461 0 87.527499725 2.808750412500011 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 3 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 6 -11 gpu conv perf 23 add fp32 1 +11 gpu conv perf 23 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 5 16 promise swing_level 5 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 6 -20 gpu conv samp 32 add fp32 1 +20 gpu conv samp 32 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 33 add fp32 1 relu fp32 1 +23 gpu conv samp 33 add fp16 1 relu fp16 1 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 7 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -1615,17 +1615,17 @@ conf35 2.61969640461 0 87.527499725 2.808750412500011 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf36 2.83250210857 0 87.593499575 2.709750637500015 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 3 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -1633,7 +1633,7 @@ conf36 2.83250210857 0 87.593499575 2.709750637500015 11 promise swing_level 3 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 6 16 promise swing_level 5 17 gpu add fp16 1 @@ -1642,17 +1642,17 @@ conf36 2.83250210857 0 87.593499575 2.709750637500015 20 promise swing_level 5 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 33 add fp32 1 relu fp32 1 +23 gpu conv samp 33 add fp16 1 relu fp16 1 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 5 29 promise swing_level 3 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -1660,43 +1660,43 @@ conf36 2.83250210857 0 87.593499575 2.709750637500015 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf37 2.629602217 0 87.5815004 2.7277494000000146 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 5 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 6 -11 gpu conv perf 23 add fp32 1 +11 gpu conv perf 23 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 5 16 promise swing_level 6 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 25 add fp32 1 relu fp32 1 -20 gpu conv samp 32 add fp32 1 +19 gpu conv perf 25 add fp16 1 relu fp16 1 +20 gpu conv samp 32 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 23 promise swing_level 6 24 promise swing_level 5 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 3 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 7 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -1705,17 +1705,17 @@ conf37 2.629602217 0 87.5815004 2.7277494000000146 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf38 2.94501882677 0 87.56749975 2.7487503750000144 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 3 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -1723,7 +1723,7 @@ conf38 2.94501882677 0 87.56749975 2.7487503750000144 11 promise swing_level 3 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 6 16 promise swing_level 5 17 gpu add fp16 1 @@ -1732,17 +1732,17 @@ conf38 2.94501882677 0 87.56749975 2.7487503750000144 20 promise swing_level 5 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 33 add fp32 1 relu fp32 1 +23 gpu conv samp 33 add fp16 1 relu fp16 1 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 3 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -1750,340 +1750,340 @@ conf38 2.94501882677 0 87.56749975 2.7487503750000144 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf39 1.99031144732 0 88.99750025 0.6037496250000061 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf40 1.99031144732 0 89.003000025 0.5954999624999999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf41 2.21547290814 0 88.256999225 1.7145011625000066 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 30 add fp32 1 +20 gpu conv perf 30 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 6 -37 gpu conv perf 25 add fp32 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf42 1.98491256317 0 88.7185015 1.0222477500000053 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 29 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 29 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf43 2.22497133236 0 88.25049995 1.7242500750000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 33 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 30 add fp32 1 +20 gpu conv perf 30 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 -37 gpu conv perf 25 add fp32 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf44 2.22497133236 0 88.1564994 1.8652509000000066 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 30 add fp32 1 +20 gpu conv perf 30 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 -37 gpu conv perf 25 add fp32 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf45 2.12621729247 0 88.2014995 1.797750750000013 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 3 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf46 2.67828189181 0 87.85199965 2.322000525000014 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 23 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 23 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 6 11 promise swing_level 3 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 7 16 promise swing_level 5 17 gpu add fp16 1 @@ -2092,17 +2092,17 @@ conf46 2.67828189181 0 87.85199965 2.322000525000014 20 promise swing_level 5 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 33 add fp32 1 relu fp32 1 +23 gpu conv samp 33 add fp16 1 relu fp16 1 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 -29 gpu conv fp16 1 add fp32 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 7 @@ -2110,25 +2110,25 @@ conf46 2.67828189181 0 87.85199965 2.322000525000014 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf47 2.62379296466 0 87.79699975 2.404500375000012 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 3 11 promise swing_level 6 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 6 16 promise swing_level 3 17 gpu add fp16 1 @@ -2137,43 +2137,43 @@ conf47 2.62379296466 0 87.79699975 2.404500375000012 20 promise swing_level 5 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 33 add fp32 1 relu fp32 1 +23 gpu conv samp 33 add fp16 1 relu fp16 1 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 3 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv samp 36 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv samp 36 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf48 3.12724843824 0 87.771999675 2.442000487500003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 6 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 6 16 promise swing_level 5 17 gpu add fp16 1 @@ -2186,13 +2186,13 @@ conf48 3.12724843824 0 87.771999675 2.442000487500003 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 7 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -2200,25 +2200,25 @@ conf48 3.12724843824 0 87.771999675 2.442000487500003 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf49 2.73513594156 0 87.807499425 2.388750862500004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 3 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 24 add fp32 1 +11 gpu conv perf 24 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 5 16 promise swing_level 5 17 gpu add fp16 1 @@ -2231,39 +2231,39 @@ conf49 2.73513594156 0 87.807499425 2.388750862500004 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 7 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 37 promise swing_level 6 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf50 2.80761440799 0 87.731999575 2.5020006375000037 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 3 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 11 promise swing_level 6 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 6 16 promise swing_level 5 17 gpu add fp16 1 @@ -2272,17 +2272,17 @@ conf50 2.80761440799 0 87.731999575 2.5020006375000037 20 promise swing_level 5 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 33 add fp32 1 relu fp32 1 -24 gpu conv samp 31 add fp32 1 +23 gpu conv samp 33 add fp16 1 relu fp16 1 +24 gpu conv samp 31 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 3 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -2290,17 +2290,17 @@ conf50 2.80761440799 0 87.731999575 2.5020006375000037 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf51 2.93837984583 0 87.805499075 2.3917513875000083 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 3 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -2308,7 +2308,7 @@ conf51 2.93837984583 0 87.805499075 2.3917513875000083 11 promise swing_level 6 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 6 16 promise swing_level 5 17 gpu add fp16 1 @@ -2317,17 +2317,17 @@ conf51 2.93837984583 0 87.805499075 2.3917513875000083 20 promise swing_level 5 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 33 add fp32 1 relu fp32 1 +23 gpu conv samp 33 add fp16 1 relu fp16 1 24 promise swing_level 5 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 7 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -2335,296 +2335,296 @@ conf51 2.93837984583 0 87.805499075 2.3917513875000083 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf52 1.99031144732 0 89.004999975 0.5925000375000025 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf53 2.17788758018 0 88.935499925 0.6967501125000055 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 7 -16 gpu conv samp 36 add fp32 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 20 promise swing_level 6 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf54 2.17873417706 0 88.9274996 0.7087506000000019 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 7 -16 gpu conv samp 36 add fp32 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 20 promise swing_level 6 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 3 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf55 1.95777525897 0 88.3009997 1.6485004500000002 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 -11 gpu conv fp16 1 add fp32 1 +11 gpu conv fp16 1 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 33 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 6 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 27 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv perf 27 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf56 1.99031144732 0 89.01400035 0.5789994750000034 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf57 1.97358985685 0 88.7140007 1.028998950000009 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv samp 34 add fp32 1 +20 gpu conv samp 34 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf58 2.56064367934 0 88.231499225 1.7527511625000187 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 6 promise swing_level 6 -7 gpu conv perf 25 add fp32 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 6 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 16 promise swing_level 6 17 gpu add fp16 1 18 gpu relu fp16 1 @@ -2632,44 +2632,44 @@ conf58 2.56064367934 0 88.231499225 1.7527511625000187 20 promise swing_level 3 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 24 promise swing_level 7 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 6 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 7 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 6 -37 gpu conv perf 25 add fp32 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf59 2.28783886296 0 88.30649835 1.6402524750000111 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 6 promise swing_level 6 -7 gpu conv perf 25 add fp32 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 -11 gpu conv perf 26 add fp32 1 +11 gpu conv perf 26 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 16 promise swing_level 7 17 gpu add fp16 1 18 gpu relu fp16 1 @@ -2677,179 +2677,179 @@ conf59 2.28783886296 0 88.30649835 1.6402524750000111 20 promise swing_level 7 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 24 promise swing_level 7 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 7 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 6 -37 gpu conv perf 25 add fp32 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf60 2.20253593211 0 88.222500025 1.7662499625000052 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv fp16 1 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv fp16 1 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 5 20 promise swing_level 7 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 36 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 36 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 -37 gpu conv perf 25 add fp32 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf61 2.11179715847 0 88.37400155 1.5389976750000045 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 -37 gpu conv perf 25 add fp32 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf62 1.88889296109 0 89.401999025 0.39800097500001075 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf63 2.74881833486 0 88.137499875 1.8937501875000038 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 6 promise swing_level 6 -7 gpu conv perf 25 add fp32 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 6 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 16 promise swing_level 6 17 gpu add fp16 1 18 gpu relu fp16 1 @@ -2857,16 +2857,16 @@ conf63 2.74881833486 0 88.137499875 1.8937501875000038 20 promise swing_level 3 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 24 promise swing_level 7 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 5 29 promise swing_level 7 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 7 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -2875,26 +2875,26 @@ conf63 2.74881833486 0 88.137499875 1.8937501875000038 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf64 2.72715091649 0 88.143500325 1.884749512500001 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 6 promise swing_level 6 -7 gpu conv perf 25 add fp32 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 6 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 16 promise swing_level 6 17 gpu add fp16 1 18 gpu relu fp16 1 @@ -2902,16 +2902,16 @@ conf64 2.72715091649 0 88.143500325 1.884749512500001 20 promise swing_level 3 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 24 promise swing_level 7 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 5 29 promise swing_level 7 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 7 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -2920,26 +2920,26 @@ conf64 2.72715091649 0 88.143500325 1.884749512500001 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf65 2.51819138242 0 88.171500225 1.8427496625000046 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 6 promise swing_level 6 -7 gpu conv perf 25 add fp32 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 6 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 16 promise swing_level 6 17 gpu add fp16 1 18 gpu relu fp16 1 @@ -2947,44 +2947,44 @@ conf65 2.51819138242 0 88.171500225 1.8427496625000046 20 promise swing_level 3 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 5 29 promise swing_level 7 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 7 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 6 -37 gpu conv perf 25 add fp32 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf66 2.45563487006 0 88.107000025 1.9394999625000011 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 6 promise swing_level 6 -7 gpu conv perf 25 add fp32 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 6 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 16 promise swing_level 7 17 gpu add fp16 1 18 gpu relu fp16 1 @@ -2992,268 +2992,268 @@ conf66 2.45563487006 0 88.107000025 1.9394999625000011 20 promise swing_level 3 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 24 promise swing_level 7 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 5 29 promise swing_level 7 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 6 -37 gpu conv perf 25 add fp32 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf67 1.8929758669 0 89.0704996 0.4942506000000009 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 16 promise swing_level 6 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf68 2.12476642317 0 88.189499675 1.8157504875000186 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 23 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 23 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 6 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 -37 gpu conv perf 25 add fp32 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf69 2.10865127669 0 88.32950015 1.6057497750000067 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 3 4 gpu add fp16 1 5 gpu relu fp16 1 6 promise swing_level 6 -7 gpu conv perf 25 add fp32 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 16 promise swing_level 7 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf70 2.03634484592 0 88.349000325 1.5764995124999999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 -20 gpu conv perf 23 add fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 36 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 36 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 -37 gpu conv perf 25 add fp32 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf71 1.96458208058 0 88.502999375 1.345500937500006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 -20 gpu conv perf 23 add fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 5 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf72 2.92748308001 0 87.6659998 2.601000300000017 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 6 11 promise swing_level 5 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 7 16 promise swing_level 6 17 gpu add fp16 1 @@ -3266,13 +3266,13 @@ conf72 2.92748308001 0 87.6659998 2.601000300000017 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 3 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -3280,17 +3280,17 @@ conf72 2.92748308001 0 87.6659998 2.601000300000017 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf73 2.97030253204 0 87.5164995 2.8252507500000164 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 3 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -3298,7 +3298,7 @@ conf73 2.97030253204 0 87.5164995 2.8252507500000164 11 promise swing_level 3 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 6 16 promise swing_level 5 17 gpu add fp16 1 @@ -3307,17 +3307,17 @@ conf73 2.97030253204 0 87.5164995 2.8252507500000164 20 promise swing_level 5 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 33 add fp32 1 relu fp32 1 +23 gpu conv samp 33 add fp16 1 relu fp16 1 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 3 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -3325,17 +3325,17 @@ conf73 2.97030253204 0 87.5164995 2.8252507500000164 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf74 2.91106181836 0 87.90149915 2.247751274999999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 3 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -3343,7 +3343,7 @@ conf74 2.91106181836 0 87.90149915 2.247751274999999 11 promise swing_level 5 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 6 16 promise swing_level 6 17 gpu add fp16 1 @@ -3356,13 +3356,13 @@ conf74 2.91106181836 0 87.90149915 2.247751274999999 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 3 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -3370,17 +3370,17 @@ conf74 2.91106181836 0 87.90149915 2.247751274999999 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf75 2.5076749918 0 87.540000475 2.7899992875000095 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 7 promise swing_level 3 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -3388,52 +3388,52 @@ conf75 2.5076749918 0 87.540000475 2.7899992875000095 11 promise swing_level 3 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 6 -16 gpu conv perf 28 add fp32 1 +16 gpu conv perf 28 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 20 promise swing_level 5 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 33 add fp32 1 relu fp32 1 +23 gpu conv samp 33 add fp16 1 relu fp16 1 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 3 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 37 promise swing_level 7 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf76 2.78687016859 0 87.6910004 2.563499400000019 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 6 11 promise swing_level 3 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 6 16 promise swing_level 5 17 gpu add fp16 1 @@ -3442,17 +3442,17 @@ conf76 2.78687016859 0 87.6910004 2.563499400000019 20 promise swing_level 5 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 33 add fp32 1 relu fp32 1 +23 gpu conv samp 33 add fp16 1 relu fp16 1 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 3 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -3460,62 +3460,62 @@ conf76 2.78687016859 0 87.6910004 2.563499400000019 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf77 1.99031144732 0 89.01750055 0.5737491750000174 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf78 1.99484508858 0 88.9860003 0.6209995500000076 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 7 promise swing_level 7 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -3523,44 +3523,44 @@ conf78 1.99484508858 0 88.9860003 0.6209995500000076 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 16 promise swing_level 6 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf79 2.02204888844 0 89.04649985 0.5302502250000032 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 7 promise swing_level 5 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -3568,44 +3568,44 @@ conf79 2.02204888844 0 89.04649985 0.5302502250000032 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 16 promise swing_level 6 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 23 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 5 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf80 1.96432132027 0 89.114499425 0.6855005750000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 7 promise swing_level 7 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -3613,44 +3613,44 @@ conf80 1.96432132027 0 89.114499425 0.6855005750000004 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 16 promise swing_level 6 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf81 2.03526556264 0 89.047999975 0.5280000375000142 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 7 promise swing_level 5 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -3658,269 +3658,269 @@ conf81 2.03526556264 0 89.047999975 0.5280000375000142 11 promise swing_level 3 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 16 promise swing_level 6 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 23 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 7 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf82 2.06667976408 0 88.9610005 0.6584992500000126 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 6 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 5 16 promise swing_level 6 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 23 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 5 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf83 1.98988747474 0 88.80850145 0.8872478250000171 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 6 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf84 1.99031144732 0 88.991502125 0.6127468125000135 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf85 2.09482739283 0 88.6145 1.1782499999999985 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 16 promise swing_level 7 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 30 add fp32 1 +20 gpu conv perf 30 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf86 2.11179715847 0 88.37550065 1.5367490249999989 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 29 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 29 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 5 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf87 2.26209467565 0 88.347499625 1.5787505625000122 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 7 promise swing_level 5 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -3928,44 +3928,44 @@ conf87 2.26209467565 0 88.347499625 1.5787505625000122 11 promise swing_level 3 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 3 -16 gpu conv samp 36 add fp32 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv perf 21 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv perf 21 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf88 2.7393340084 0 87.67699915 2.584501275000008 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 3 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -3973,7 +3973,7 @@ conf88 2.7393340084 0 87.67699915 2.584501275000008 11 promise swing_level 3 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 5 16 promise swing_level 5 17 gpu add fp16 1 @@ -3982,17 +3982,17 @@ conf88 2.7393340084 0 87.67699915 2.584501275000008 20 promise swing_level 5 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 33 add fp32 1 relu fp32 1 +23 gpu conv samp 33 add fp16 1 relu fp16 1 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 7 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -4000,17 +4000,17 @@ conf88 2.7393340084 0 87.67699915 2.584501275000008 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf89 2.94501882677 0 87.56549995 2.751750075000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 3 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -4018,7 +4018,7 @@ conf89 2.94501882677 0 87.56549995 2.751750075000004 11 promise swing_level 3 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 6 16 promise swing_level 5 17 gpu add fp16 1 @@ -4027,17 +4027,17 @@ conf89 2.94501882677 0 87.56549995 2.751750075000004 20 promise swing_level 5 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 33 add fp32 1 relu fp32 1 +23 gpu conv samp 33 add fp16 1 relu fp16 1 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 3 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -4045,17 +4045,17 @@ conf89 2.94501882677 0 87.56549995 2.751750075000004 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf90 2.5282332844 0 87.74799995 2.4780000750000184 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 7 promise swing_level 3 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -4063,7 +4063,7 @@ conf90 2.5282332844 0 87.74799995 2.4780000750000184 11 promise swing_level 3 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 5 16 promise swing_level 5 17 gpu add fp16 1 @@ -4072,62 +4072,62 @@ conf90 2.5282332844 0 87.74799995 2.4780000750000184 20 promise swing_level 5 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 33 add fp32 1 relu fp32 1 +23 gpu conv samp 33 add fp16 1 relu fp16 1 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 -29 gpu conv fp16 1 add fp32 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 37 promise swing_level 5 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf91 2.57862062727 0 87.52849945 2.807250825000011 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 3 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 11 promise swing_level 3 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 3 16 promise swing_level 3 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 25 add fp32 1 +20 gpu conv perf 25 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 33 add fp32 1 relu fp32 1 +23 gpu conv samp 33 add fp16 1 relu fp16 1 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 -29 gpu conv fp16 1 add fp32 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -4135,17 +4135,17 @@ conf91 2.57862062727 0 87.52849945 2.807250825000011 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf92 2.93115743965 0 87.497000275 2.854499587500001 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 3 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -4153,26 +4153,26 @@ conf92 2.93115743965 0 87.497000275 2.854499587500001 11 promise swing_level 3 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 6 -16 gpu conv fp16 1 add fp32 1 +16 gpu conv fp16 1 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 20 promise swing_level 5 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 33 add fp32 1 relu fp32 1 +23 gpu conv samp 33 add fp16 1 relu fp16 1 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 3 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -4180,17 +4180,17 @@ conf92 2.93115743965 0 87.497000275 2.854499587500001 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf93 2.78797917223 0 87.645499425 2.631750862500013 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 3 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -4198,7 +4198,7 @@ conf93 2.78797917223 0 87.645499425 2.631750862500013 11 promise swing_level 3 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 5 16 promise swing_level 5 17 gpu add fp16 1 @@ -4207,17 +4207,17 @@ conf93 2.78797917223 0 87.645499425 2.631750862500013 20 promise swing_level 5 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 33 add fp32 1 relu fp32 1 +23 gpu conv samp 33 add fp16 1 relu fp16 1 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 -29 gpu conv fp16 1 add fp32 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -4225,25 +4225,25 @@ conf93 2.78797917223 0 87.645499425 2.631750862500013 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf94 2.78750714825 0 87.816499475 2.3752507875000077 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 3 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 11 promise swing_level 3 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 6 16 promise swing_level 5 17 gpu add fp16 1 @@ -4252,17 +4252,17 @@ conf94 2.78750714825 0 87.816499475 2.3752507875000077 20 promise swing_level 5 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 33 add fp32 1 relu fp32 1 +23 gpu conv samp 33 add fp16 1 relu fp16 1 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 3 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -4270,17 +4270,17 @@ conf94 2.78750714825 0 87.816499475 2.3752507875000077 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf95 2.94625729666 0 87.554999875 2.767500187500019 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 3 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -4288,7 +4288,7 @@ conf95 2.94625729666 0 87.554999875 2.767500187500019 11 promise swing_level 3 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 5 16 promise swing_level 5 17 gpu add fp16 1 @@ -4297,17 +4297,17 @@ conf95 2.94625729666 0 87.554999875 2.767500187500019 20 promise swing_level 5 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 33 add fp32 1 relu fp32 1 +23 gpu conv samp 33 add fp16 1 relu fp16 1 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 -29 gpu conv fp16 1 add fp32 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -4315,557 +4315,557 @@ conf95 2.94625729666 0 87.554999875 2.767500187500019 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf96 1.99031144732 0 89.016499725 0.5752504125000044 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf97 1.95992520608 0 89.170500175 0.6294998250000049 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv fp16 1 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv fp16 1 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf98 1.99031144732 0 89.026500525 0.560249212500004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf99 2.12232021484 0 88.17899975 1.8315003750000045 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv fp16 1 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv fp16 1 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 7 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 23 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 23 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf100 2.24934835888 0 88.1444998 1.8832503000000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 5 -16 gpu conv samp 36 add fp32 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 7 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 37 promise swing_level 3 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf101 2.20944681231 0 88.3700004 1.544999400000016 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 5 -16 gpu conv samp 36 add fp32 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 7 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 37 promise swing_level 7 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf102 2.12392870276 0 88.23650015 1.745249775000012 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 7 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf103 2.21132934299 0 88.22849925 1.7572511250000105 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 5 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 5 -16 gpu conv samp 36 add fp32 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 -20 gpu conv perf 23 add fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 36 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 36 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 5 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 37 promise swing_level 3 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf104 2.21416009267 0 88.36400015 1.5539997750000154 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 20 promise swing_level 6 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 7 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf105 2.12232021484 0 88.2029984 1.7955024000000108 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv fp16 1 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv fp16 1 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 7 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf106 2.11179715847 0 88.47150095 1.3927485749999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 5 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf107 2.00882964266 0 88.890000175 0.7649997375000126 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 7 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 7 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf108 2.9174259258 0 87.5524999 2.7712501500000073 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 3 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -4873,7 +4873,7 @@ conf108 2.9174259258 0 87.5524999 2.7712501500000073 11 promise swing_level 3 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 6 16 promise swing_level 5 17 gpu add fp16 1 @@ -4882,17 +4882,17 @@ conf108 2.9174259258 0 87.5524999 2.7712501500000073 20 promise swing_level 5 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 33 add fp32 1 relu fp32 1 +23 gpu conv samp 33 add fp16 1 relu fp16 1 24 promise swing_level 5 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 3 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -4900,17 +4900,17 @@ conf108 2.9174259258 0 87.5524999 2.7712501500000073 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf109 3.01926465481 0 87.6829996 2.575500600000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 5 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -4918,7 +4918,7 @@ conf109 3.01926465481 0 87.6829996 2.575500600000005 11 promise swing_level 3 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 6 16 promise swing_level 5 17 gpu add fp16 1 @@ -4927,17 +4927,17 @@ conf109 3.01926465481 0 87.6829996 2.575500600000005 20 promise swing_level 5 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 33 add fp32 1 relu fp32 1 +23 gpu conv samp 33 add fp16 1 relu fp16 1 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 5 29 promise swing_level 3 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -4945,242 +4945,242 @@ conf109 3.01926465481 0 87.6829996 2.575500600000005 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf110 1.99031144732 0 88.994500175 0.6082497375000102 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf111 2.02165472925 0 88.853001825 0.8204972624999982 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 7 -16 gpu conv samp 36 add fp32 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf112 2.10323858387 0 88.4375 1.4437500000000085 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 6 -37 gpu conv perf 25 add fp32 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf113 1.99031144732 0 88.985500375 0.6217494375000072 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf114 2.01054528261 0 88.803000075 0.8954998875000086 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 26 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf115 2.31295256497 0 88.23900015 1.7414997750000154 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 7 promise swing_level 7 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -5188,89 +5188,89 @@ conf115 2.31295256497 0 88.23900015 1.7414997750000154 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 3 20 promise swing_level 3 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 24 promise swing_level 7 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 3 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf116 2.2393726673 0 88.3990008 1.5014988000000145 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 5 -16 gpu conv samp 36 add fp32 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 6 -37 gpu conv perf 25 add fp32 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf117 2.22017827966 0 88.36450045 1.5532493250000172 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 7 promise swing_level 7 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -5278,44 +5278,44 @@ conf117 2.22017827966 0 88.36450045 1.5532493250000172 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv fp16 1 add fp32 1 +20 gpu conv fp16 1 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 37 promise swing_level 6 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf118 2.32851933584 0 88.3809991 1.5285013500000133 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 7 promise swing_level 7 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -5323,89 +5323,89 @@ conf118 2.32851933584 0 88.3809991 1.5285013500000133 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 6 20 promise swing_level 3 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 3 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf119 2.14155624278 0 88.3665003 1.5502495500000109 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 6 promise swing_level 6 -7 gpu conv perf 25 add fp32 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 6 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv fp16 1 add fp32 1 +20 gpu conv fp16 1 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 7 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf120 2.35671723532 0 88.155000125 1.8674998125000073 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 7 promise swing_level 7 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -5413,44 +5413,44 @@ conf120 2.35671723532 0 88.155000125 1.8674998125000073 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 33 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 3 20 promise swing_level 3 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 3 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf121 2.28578618559 0 88.619000225 1.1714996625000182 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 7 promise swing_level 7 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -5458,44 +5458,44 @@ conf121 2.28578618559 0 88.619000225 1.1714996625000182 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 20 promise swing_level 3 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf122 2.26729306488 0 88.287001 1.669498500000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 7 promise swing_level 3 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -5503,9 +5503,9 @@ conf122 2.26729306488 0 88.287001 1.669498500000003 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv fp16 1 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv fp16 1 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 3 @@ -5513,124 +5513,124 @@ conf122 2.26729306488 0 88.287001 1.669498500000003 21 gpu add fp16 1 22 gpu relu fp16 1 23 promise swing_level 6 -24 gpu conv perf 26 add fp32 1 +24 gpu conv perf 26 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 23 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 23 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf123 2.10323858387 0 88.316500725 1.6252489125000125 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 23 promise swing_level 6 -24 gpu conv perf 30 add fp32 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 24 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 24 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf124 2.09017172033 0 88.973500375 0.6397494375000079 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 5 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 29 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 29 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 20 promise swing_level 6 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf125 2.2942068747 0 88.31599975 1.6260003750000038 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 7 promise swing_level 7 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -5638,9 +5638,9 @@ conf125 2.2942068747 0 88.31599975 1.6260003750000038 11 promise swing_level 5 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 @@ -5651,31 +5651,31 @@ conf125 2.2942068747 0 88.31599975 1.6260003750000038 24 promise swing_level 5 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf126 2.31295256497 0 88.4110002 1.483499700000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 7 promise swing_level 7 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -5683,89 +5683,89 @@ conf126 2.31295256497 0 88.4110002 1.483499700000003 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 3 20 promise swing_level 3 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 7 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf127 2.03054655185 0 88.532499775 1.3012503375000009 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 -20 gpu conv perf 23 add fp32 1 +20 gpu conv perf 23 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 21 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv perf 21 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 5 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 7 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 25 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 25 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf128 2.25722949116 0 88.17049885 1.8442517250000066 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 7 promise swing_level 3 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -5773,71 +5773,71 @@ conf128 2.25722949116 0 88.17049885 1.8442517250000066 11 promise swing_level 6 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 5 -16 gpu conv fp16 1 add fp32 1 +16 gpu conv fp16 1 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 -20 gpu conv perf 30 add fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 +20 gpu conv perf 30 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 24 promise swing_level 7 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 36 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 36 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 7 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv perf 23 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv perf 23 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf129 2.69873432123 0 87.558499725 2.762250412500002 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 5 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 6 11 promise swing_level 5 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 6 16 promise swing_level 5 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 20 promise swing_level 5 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 3 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -5845,25 +5845,25 @@ conf129 2.69873432123 0 87.558499725 2.762250412500002 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf130 2.95482989044 0 87.818500775 2.3722488375000026 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 7 promise swing_level 3 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 11 promise swing_level 3 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 6 16 promise swing_level 5 17 gpu add fp16 1 @@ -5876,13 +5876,13 @@ conf130 2.95482989044 0 87.818500775 2.3722488375000026 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 3 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -5890,17 +5890,17 @@ conf130 2.95482989044 0 87.818500775 2.3722488375000026 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf131 3.10122333317 0 87.718999725 2.5215004125000036 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 7 promise swing_level 3 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -5908,7 +5908,7 @@ conf131 3.10122333317 0 87.718999725 2.5215004125000036 11 promise swing_level 5 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 6 16 promise swing_level 5 17 gpu add fp16 1 @@ -5921,13 +5921,13 @@ conf131 3.10122333317 0 87.718999725 2.5215004125000036 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 3 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -5935,17 +5935,17 @@ conf131 3.10122333317 0 87.718999725 2.5215004125000036 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf132 2.95998945096 0 87.8739986 2.289002100000019 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 3 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -5953,26 +5953,26 @@ conf132 2.95998945096 0 87.8739986 2.289002100000019 11 promise swing_level 3 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 6 -16 gpu conv perf 28 add fp32 1 +16 gpu conv perf 28 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 20 promise swing_level 5 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 3 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -5980,17 +5980,17 @@ conf132 2.95998945096 0 87.8739986 2.289002100000019 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf133 2.97030253204 0 88.020500825 2.0692487625000098 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 3 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -5998,7 +5998,7 @@ conf133 2.97030253204 0 88.020500825 2.0692487625000098 11 promise swing_level 3 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 6 16 promise swing_level 5 17 gpu add fp16 1 @@ -6007,17 +6007,17 @@ conf133 2.97030253204 0 88.020500825 2.0692487625000098 20 promise swing_level 5 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 33 add fp32 1 relu fp32 1 +23 gpu conv samp 33 add fp16 1 relu fp16 1 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 3 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -6025,25 +6025,25 @@ conf133 2.97030253204 0 88.020500825 2.0692487625000098 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf134 2.89754619018 0 87.88999915 2.2650012750000172 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 5 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv perf 23 add fp32 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv perf 23 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 6 11 promise swing_level 5 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 7 16 promise swing_level 7 17 gpu add fp16 1 @@ -6056,13 +6056,13 @@ conf134 2.89754619018 0 87.88999915 2.2650012750000172 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 3 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -6070,17 +6070,17 @@ conf134 2.89754619018 0 87.88999915 2.2650012750000172 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf135 2.95932440473 0 87.7799999 2.430000149999998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 3 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -6088,26 +6088,26 @@ conf135 2.95932440473 0 87.7799999 2.430000149999998 11 promise swing_level 3 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 6 -16 gpu conv samp 35 add fp32 1 +16 gpu conv samp 35 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 20 promise swing_level 5 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 36 add fp32 1 relu fp32 1 +23 gpu conv samp 36 add fp16 1 relu fp16 1 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 3 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -6115,17 +6115,17 @@ conf135 2.95932440473 0 87.7799999 2.430000149999998 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf136 3.10431511108 0 87.810000475 2.3849992875000154 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 7 promise swing_level 3 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -6133,7 +6133,7 @@ conf136 3.10431511108 0 87.810000475 2.3849992875000154 11 promise swing_level 3 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 7 16 promise swing_level 5 17 gpu add fp16 1 @@ -6146,13 +6146,13 @@ conf136 3.10431511108 0 87.810000475 2.3849992875000154 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 3 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -6160,27 +6160,27 @@ conf136 3.10431511108 0 87.810000475 2.3849992875000154 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf137 2.74898757735 0 87.8414997 2.3377504500000086 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 6 11 promise swing_level 3 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 6 -16 gpu conv perf 23 add fp32 1 +16 gpu conv perf 23 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 @@ -6188,16 +6188,16 @@ conf137 2.74898757735 0 87.8414997 2.3377504500000086 21 gpu add fp16 1 22 gpu relu fp16 1 23 promise swing_level 7 -24 gpu conv samp 31 add fp32 1 +24 gpu conv samp 31 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 3 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -6205,17 +6205,17 @@ conf137 2.74898757735 0 87.8414997 2.3377504500000086 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf138 3.13242103648 0 87.68000035 2.579999475000008 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 7 promise swing_level 3 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -6223,7 +6223,7 @@ conf138 3.13242103648 0 87.68000035 2.579999475000008 11 promise swing_level 3 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 6 16 promise swing_level 5 17 gpu add fp16 1 @@ -6236,13 +6236,13 @@ conf138 3.13242103648 0 87.68000035 2.579999475000008 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 3 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -6250,17 +6250,17 @@ conf138 3.13242103648 0 87.68000035 2.579999475000008 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf139 2.88567199664 0 87.648499875 2.6272501875000103 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 5 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 7 promise swing_level 3 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -6268,7 +6268,7 @@ conf139 2.88567199664 0 87.648499875 2.6272501875000103 11 promise swing_level 5 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 6 16 promise swing_level 5 17 gpu add fp16 1 @@ -6281,20 +6281,20 @@ conf139 2.88567199664 0 87.648499875 2.6272501875000103 24 promise swing_level 3 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 3 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 37 promise swing_level 6 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/resnet18/data/autotuner_data/tuner_promise_confs_batch220_single.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/resnet18/data/autotuner_data/tuner_promise_confs_batch220_single.txt index 902931e686..714c965a13 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/resnet18/data/autotuner_data/tuner_promise_confs_batch220_single.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/resnet18/data/autotuner_data/tuner_promise_confs_batch220_single.txt @@ -45,200 +45,200 @@ conf1 1 0 89.4 0 ----- +++++ conf1 1.93605418386 0 88.9565002 0.6652497000000182 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 6 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 6 -11 gpu conv fp16 1 add fp32 1 +11 gpu conv fp16 1 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 4 20 promise swing_level 6 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 7 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv fp16 1 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv fp16 1 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf2 2.01660790316 0 89.016001275 0.5759980875000181 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 4 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 6 -11 gpu conv fp16 1 add fp32 1 +11 gpu conv fp16 1 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 6 -16 gpu conv samp 36 add fp32 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 7 20 promise swing_level 4 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv fp16 1 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv fp16 1 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 7 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv fp16 1 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv fp16 1 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf3 1.93816949765 0 88.9170002 0.7244997000000026 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 6 -11 gpu conv fp16 1 add fp32 1 +11 gpu conv fp16 1 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 20 promise swing_level 6 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 7 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv fp16 1 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv fp16 1 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf4 1.95244267674 0 88.88800135 0.7679979750000143 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 6 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 4 -11 gpu conv fp16 1 add fp32 1 +11 gpu conv fp16 1 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 -15 gpu conv fp16 1 add fp32 1 -16 gpu conv samp 36 add fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 +15 gpu conv fp16 1 add fp16 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 4 20 promise swing_level 6 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv perf 29 add fp32 1 relu fp32 1 -24 gpu conv perf 30 add fp32 1 +23 gpu conv perf 29 add fp16 1 relu fp16 1 +24 gpu conv perf 30 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 7 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 -37 gpu conv fp16 1 add fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 +37 gpu conv fp16 1 add fp16 1 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf5 2.51058832101 0 88.21949995 1.7707500750000094 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 6 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 23 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 23 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 7 16 promise swing_level 7 17 gpu add fp16 1 @@ -251,13 +251,13 @@ conf5 2.51058832101 0 88.21949995 1.7707500750000094 24 promise swing_level 4 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 7 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 7 @@ -265,44 +265,44 @@ conf5 2.51058832101 0 88.21949995 1.7707500750000094 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf6 2.40167508488 0 88.157000325 1.8644995125000108 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 6 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 23 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 23 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 11 promise swing_level 6 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 6 16 promise swing_level 7 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 -20 gpu conv fp16 1 add fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 +20 gpu conv fp16 1 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 23 promise swing_level 6 24 promise swing_level 4 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 5 29 promise swing_level 6 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 7 @@ -310,25 +310,25 @@ conf6 2.40167508488 0 88.157000325 1.8644995125000108 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf7 2.53821618636 0 88.19 1.815000000000012 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 6 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 23 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 23 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 7 16 promise swing_level 7 17 gpu add fp16 1 @@ -341,13 +341,13 @@ conf7 2.53821618636 0 88.19 1.815000000000012 24 promise swing_level 4 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 7 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 7 @@ -355,30 +355,30 @@ conf7 2.53821618636 0 88.19 1.815000000000012 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf8 2.37827715176 0 88.092999475 1.9605007875000098 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 6 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 23 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 23 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 5 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 7 16 promise swing_level 7 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 30 add fp32 1 relu fp32 1 +19 gpu conv perf 30 add fp16 1 relu fp16 1 20 promise swing_level 5 21 gpu add fp16 1 22 gpu relu fp16 1 @@ -386,41 +386,41 @@ conf8 2.37827715176 0 88.092999475 1.9605007875000098 24 promise swing_level 4 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 7 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 37 promise swing_level 5 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf9 2.5355068429 0 88.159500675 1.8607489875000027 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 6 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 23 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 +6 gpu conv perf 23 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 6 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 4 -16 gpu conv samp 36 add fp32 1 +16 gpu conv samp 36 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 19 promise swing_level 5 @@ -431,44 +431,44 @@ conf9 2.5355068429 0 88.159500675 1.8607489875000027 24 promise swing_level 4 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 6 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 37 promise swing_level 6 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf10 3.04306539985 0 87.584999525 2.7225007125000076 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 4 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 4 -16 gpu conv samp 33 add fp32 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 20 promise swing_level 7 21 gpu add fp16 1 22 gpu relu fp16 1 @@ -476,12 +476,12 @@ conf10 3.04306539985 0 87.584999525 2.7225007125000076 24 promise swing_level 4 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 5 -29 gpu conv fp16 1 add fp32 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -490,17 +490,17 @@ conf10 3.04306539985 0 87.584999525 2.7225007125000076 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf11 3.27739736367 0 87.643499625 2.6347505625000025 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 6 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 6 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -508,12 +508,12 @@ conf11 3.27739736367 0 87.643499625 2.6347505625000025 11 promise swing_level 3 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 4 -16 gpu conv samp 33 add fp32 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 20 promise swing_level 3 21 gpu add fp16 1 22 gpu relu fp16 1 @@ -521,12 +521,12 @@ conf11 3.27739736367 0 87.643499625 2.6347505625000025 24 promise swing_level 4 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 6 29 promise swing_level 4 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -535,43 +535,43 @@ conf11 3.27739736367 0 87.643499625 2.6347505625000025 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf12 2.9586513497 0 87.53199885 2.802001725000018 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 6 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 23 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 23 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 3 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 4 -16 gpu conv fp16 1 add fp32 1 +16 gpu conv fp16 1 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 20 promise swing_level 3 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 33 add fp32 1 relu fp32 1 +23 gpu conv samp 33 add fp16 1 relu fp16 1 24 promise swing_level 4 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 6 29 promise swing_level 4 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -580,30 +580,30 @@ conf12 2.9586513497 0 87.53199885 2.802001725000018 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf13 2.95055223818 0 87.5959999 2.7060001500000155 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 3 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 6 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 11 promise swing_level 3 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 4 -16 gpu conv samp 33 add fp32 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 20 promise swing_level 3 21 gpu add fp16 1 22 gpu relu fp16 1 @@ -611,13 +611,13 @@ conf13 2.95055223818 0 87.5959999 2.7060001500000155 24 promise swing_level 4 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 4 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 4 @@ -625,17 +625,17 @@ conf13 2.95055223818 0 87.5959999 2.7060001500000155 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf14 3.06451291352 0 87.55650045 2.7652493250000063 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 4 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -643,12 +643,12 @@ conf14 3.06451291352 0 87.55650045 2.7652493250000063 11 promise swing_level 3 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 4 -16 gpu conv samp 33 add fp32 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 20 promise swing_level 3 21 gpu add fp16 1 22 gpu relu fp16 1 @@ -656,12 +656,12 @@ conf14 3.06451291352 0 87.55650045 2.7652493250000063 24 promise swing_level 4 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 4 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -670,30 +670,30 @@ conf14 3.06451291352 0 87.55650045 2.7652493250000063 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf15 2.84800200683 0 87.95849955 2.16225067500001 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 7 8 gpu add fp16 1 9 gpu relu fp16 1 -10 gpu conv perf 30 add fp32 1 relu fp32 1 +10 gpu conv perf 30 add fp16 1 relu fp16 1 11 promise swing_level 4 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 7 -16 gpu conv samp 33 add fp32 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 20 promise swing_level 3 21 gpu add fp16 1 22 gpu relu fp16 1 @@ -701,13 +701,13 @@ conf15 2.84800200683 0 87.95849955 2.16225067500001 24 promise swing_level 4 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 5 29 promise swing_level 4 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 4 @@ -715,17 +715,17 @@ conf15 2.84800200683 0 87.95849955 2.16225067500001 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf16 3.03464677656 0 87.531499925 2.8027501125 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 4 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -733,12 +733,12 @@ conf16 3.03464677656 0 87.531499925 2.8027501125 11 promise swing_level 4 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 4 -16 gpu conv samp 33 add fp32 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 20 promise swing_level 3 21 gpu add fp16 1 22 gpu relu fp16 1 @@ -746,31 +746,31 @@ conf16 3.03464677656 0 87.531499925 2.8027501125 24 promise swing_level 4 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 5 29 promise swing_level 4 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 -36 gpu conv fp16 1 add fp32 1 relu fp32 1 +36 gpu conv fp16 1 add fp16 1 relu fp16 1 37 promise swing_level 5 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf17 2.94480154212 0 87.555499875 2.7667501875000156 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 6 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 6 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -778,12 +778,12 @@ conf17 2.94480154212 0 87.555499875 2.7667501875000156 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 5 -16 gpu conv samp 33 add fp32 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 20 promise swing_level 3 21 gpu add fp16 1 22 gpu relu fp16 1 @@ -791,12 +791,12 @@ conf17 2.94480154212 0 87.555499875 2.7667501875000156 24 promise swing_level 4 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 36 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 36 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -805,17 +805,17 @@ conf17 2.94480154212 0 87.555499875 2.7667501875000156 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf18 2.81580889712 0 87.945499825 2.1817502625000103 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 4 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -823,26 +823,26 @@ conf18 2.81580889712 0 87.945499825 2.1817502625000103 11 promise swing_level 3 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 7 -16 gpu conv samp 33 add fp32 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 29 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 29 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 23 promise swing_level 5 24 promise swing_level 4 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 4 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 3 @@ -850,17 +850,17 @@ conf18 2.81580889712 0 87.945499825 2.1817502625000103 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf19 2.74795523388 0 87.89949895 2.2507515750000167 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 6 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 6 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -868,25 +868,25 @@ conf19 2.74795523388 0 87.89949895 2.2507515750000167 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 4 -16 gpu conv samp 33 add fp32 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 29 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 29 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 23 promise swing_level 7 24 promise swing_level 4 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 36 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 36 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -895,43 +895,43 @@ conf19 2.74795523388 0 87.89949895 2.2507515750000167 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf20 2.64693797866 0 87.641500225 2.6377496625000063 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 6 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 6 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 -11 gpu conv perf 29 add fp32 1 +11 gpu conv perf 29 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 4 -16 gpu conv samp 33 add fp32 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 25 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 25 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 23 promise swing_level 7 24 promise swing_level 4 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 36 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 36 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -940,17 +940,17 @@ conf20 2.64693797866 0 87.641500225 2.6377496625000063 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf21 2.80615190473 0 87.6549991 2.617501350000012 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 6 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 6 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -958,25 +958,25 @@ conf21 2.80615190473 0 87.6549991 2.617501350000012 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 4 -16 gpu conv samp 33 add fp32 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 29 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 29 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 23 promise swing_level 3 24 promise swing_level 4 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 36 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 36 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 7 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -985,43 +985,43 @@ conf21 2.80615190473 0 87.6549991 2.617501350000012 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf22 2.45193895003 0 87.686001175 2.5709982375000067 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 22 add fp32 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 -11 gpu conv perf 24 add fp32 1 +11 gpu conv perf 24 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 4 -16 gpu conv samp 33 add fp32 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 29 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 29 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 23 promise swing_level 7 -24 gpu conv samp 31 add fp32 1 +24 gpu conv samp 31 add fp16 1 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 3 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -1030,30 +1030,30 @@ conf22 2.45193895003 0 87.686001175 2.5709982375000067 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf23 2.93663883728 0 87.5605003 2.759249550000007 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 4 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 4 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 4 -16 gpu conv samp 33 add fp32 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 20 promise swing_level 3 21 gpu add fp16 1 22 gpu relu fp16 1 @@ -1061,12 +1061,12 @@ conf23 2.93663883728 0 87.5605003 2.759249550000007 24 promise swing_level 4 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 4 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -1075,30 +1075,30 @@ conf23 2.93663883728 0 87.5605003 2.759249550000007 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf24 2.80615190473 0 87.73149945 2.502750825000007 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 6 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 23 add fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 23 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 4 -16 gpu conv samp 33 add fp32 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 20 promise swing_level 3 21 gpu add fp16 1 22 gpu relu fp16 1 @@ -1106,12 +1106,12 @@ conf24 2.80615190473 0 87.73149945 2.502750825000007 24 promise swing_level 4 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 36 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 36 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -1120,17 +1120,17 @@ conf24 2.80615190473 0 87.73149945 2.502750825000007 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf25 2.71381423967 0 87.960499925 2.1592501125000183 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 6 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 6 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -1138,12 +1138,12 @@ conf25 2.71381423967 0 87.960499925 2.1592501125000183 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 4 -16 gpu conv samp 33 add fp32 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 20 promise swing_level 3 21 gpu add fp16 1 22 gpu relu fp16 1 @@ -1151,13 +1151,13 @@ conf25 2.71381423967 0 87.960499925 2.1592501125000183 24 promise swing_level 4 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 36 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 36 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 -33 gpu conv fp16 1 add fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 +33 gpu conv fp16 1 add fp16 1 34 gpu add fp16 1 35 gpu relu fp16 1 36 promise swing_level 5 @@ -1165,17 +1165,17 @@ conf25 2.71381423967 0 87.960499925 2.1592501125000183 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf26 3.04205860198 0 87.6664993 2.6002510500000113 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 4 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -1183,12 +1183,12 @@ conf26 3.04205860198 0 87.6664993 2.6002510500000113 11 promise swing_level 3 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 4 -16 gpu conv samp 33 add fp32 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 20 promise swing_level 3 21 gpu add fp16 1 22 gpu relu fp16 1 @@ -1196,12 +1196,12 @@ conf26 3.04205860198 0 87.6664993 2.6002510500000113 24 promise swing_level 5 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 4 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -1210,17 +1210,17 @@ conf26 3.04205860198 0 87.6664993 2.6002510500000113 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf27 3.31578734525 0 87.7494991 2.4757513500000172 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 4 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -1228,12 +1228,12 @@ conf27 3.31578734525 0 87.7494991 2.4757513500000172 11 promise swing_level 4 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 4 -16 gpu conv samp 33 add fp32 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 20 promise swing_level 3 21 gpu add fp16 1 22 gpu relu fp16 1 @@ -1241,12 +1241,12 @@ conf27 3.31578734525 0 87.7494991 2.4757513500000172 24 promise swing_level 4 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 6 29 promise swing_level 4 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -1255,17 +1255,17 @@ conf27 3.31578734525 0 87.7494991 2.4757513500000172 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf28 2.85191471569 0 87.760499975 2.4592500375000057 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 4 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -1273,25 +1273,25 @@ conf28 2.85191471569 0 87.760499975 2.4592500375000057 11 promise swing_level 3 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 4 -16 gpu conv samp 33 add fp32 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 23 promise swing_level 7 24 promise swing_level 4 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 4 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -1300,43 +1300,43 @@ conf28 2.85191471569 0 87.760499975 2.4592500375000057 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf29 2.62750656186 0 87.572500275 2.7412495875000147 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 22 add fp32 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 4 -16 gpu conv samp 33 add fp32 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 29 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 29 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 23 promise swing_level 7 24 promise swing_level 7 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 6 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -1345,17 +1345,17 @@ conf29 2.62750656186 0 87.572500275 2.7412495875000147 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf30 3.28980519592 0 87.613999575 2.6790006375000175 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 4 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -1363,12 +1363,12 @@ conf30 3.28980519592 0 87.613999575 2.6790006375000175 11 promise swing_level 3 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 4 -16 gpu conv samp 33 add fp32 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 20 promise swing_level 3 21 gpu add fp16 1 22 gpu relu fp16 1 @@ -1376,12 +1376,12 @@ conf30 3.28980519592 0 87.613999575 2.6790006375000175 24 promise swing_level 4 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 -29 gpu conv fp16 1 add fp32 1 +29 gpu conv fp16 1 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -1390,43 +1390,43 @@ conf30 3.28980519592 0 87.613999575 2.6790006375000175 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf31 2.70851546456 0 87.500500375 2.8492494375000064 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 6 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 6 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 -11 gpu conv perf 23 add fp32 1 +11 gpu conv perf 23 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 4 -16 gpu conv samp 33 add fp32 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 20 promise swing_level 3 21 gpu add fp16 1 22 gpu relu fp16 1 -23 gpu conv samp 33 add fp32 1 relu fp32 1 +23 gpu conv samp 33 add fp16 1 relu fp16 1 24 promise swing_level 4 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 36 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 36 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 5 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -1435,43 +1435,43 @@ conf31 2.70851546456 0 87.500500375 2.8492494375000064 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf32 2.68618832133 0 87.63849965 2.6422505250000086 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 -3 gpu conv fp16 1 add fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 4 gpu add fp16 1 5 gpu relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 22 add fp32 1 +7 gpu conv perf 22 add fp16 1 8 gpu add fp16 1 9 gpu relu fp16 1 10 promise swing_level 7 -11 gpu conv perf 26 add fp32 1 +11 gpu conv perf 26 add fp16 1 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 4 -16 gpu conv samp 33 add fp32 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 -20 gpu conv perf 24 add fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 +20 gpu conv perf 24 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 23 promise swing_level 7 24 promise swing_level 4 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 5 29 promise swing_level 3 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -1480,17 +1480,17 @@ conf32 2.68618832133 0 87.63849965 2.6422505250000086 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf33 3.09189955591 0 87.6530001 2.6204998500000087 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 6 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 4 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -1498,12 +1498,12 @@ conf33 3.09189955591 0 87.6530001 2.6204998500000087 11 promise swing_level 3 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 4 -16 gpu conv samp 33 add fp32 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 20 promise swing_level 3 21 gpu add fp16 1 22 gpu relu fp16 1 @@ -1511,12 +1511,12 @@ conf33 3.09189955591 0 87.6530001 2.6204998500000087 24 promise swing_level 4 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 29 promise swing_level 4 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -1525,17 +1525,17 @@ conf33 3.09189955591 0 87.6530001 2.6204998500000087 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf34 2.96583807582 0 87.583499625 2.724750562500006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 6 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 6 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -1543,12 +1543,12 @@ conf34 2.96583807582 0 87.583499625 2.724750562500006 11 promise swing_level 7 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 4 -16 gpu conv samp 33 add fp32 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 20 promise swing_level 3 21 gpu add fp16 1 22 gpu relu fp16 1 @@ -1556,12 +1556,12 @@ conf34 2.96583807582 0 87.583499625 2.724750562500006 24 promise swing_level 4 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 -28 gpu conv fp16 1 add fp32 1 -29 gpu conv samp 36 add fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 +28 gpu conv fp16 1 add fp16 1 +29 gpu conv samp 36 add fp16 1 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -1570,17 +1570,17 @@ conf34 2.96583807582 0 87.583499625 2.724750562500006 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf35 3.13864816784 0 87.71199925 2.5320011250000007 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 6 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 4 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -1588,25 +1588,25 @@ conf35 3.13864816784 0 87.71199925 2.5320011250000007 11 promise swing_level 3 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 4 -16 gpu conv samp 33 add fp32 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 25 add fp32 1 relu fp32 1 -20 gpu conv samp 32 add fp32 1 +19 gpu conv perf 25 add fp16 1 relu fp16 1 +20 gpu conv samp 32 add fp16 1 21 gpu add fp16 1 22 gpu relu fp16 1 23 promise swing_level 7 24 promise swing_level 4 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 4 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 5 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -1615,17 +1615,17 @@ conf35 3.13864816784 0 87.71199925 2.5320011250000007 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- +++++ conf36 3.28257993297 0 87.6709994 2.5935009000000093 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 3 promise swing_level 7 4 gpu add fp16 1 5 gpu relu fp16 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 7 promise swing_level 5 8 gpu add fp16 1 9 gpu relu fp16 1 @@ -1633,12 +1633,12 @@ conf36 3.28257993297 0 87.6709994 2.5935009000000093 11 promise swing_level 3 12 gpu add fp16 1 13 gpu relu fp16 1 -14 gpu conv fp16 1 add fp32 1 relu fp32 1 +14 gpu conv fp16 1 add fp16 1 relu fp16 1 15 promise swing_level 4 -16 gpu conv samp 33 add fp32 1 +16 gpu conv samp 33 add fp16 1 17 gpu add fp16 1 18 gpu relu fp16 1 -19 gpu conv perf 28 add fp32 1 relu fp32 1 +19 gpu conv perf 28 add fp16 1 relu fp16 1 20 promise swing_level 3 21 gpu add fp16 1 22 gpu relu fp16 1 @@ -1646,12 +1646,12 @@ conf36 3.28257993297 0 87.6709994 2.5935009000000093 24 promise swing_level 4 25 gpu add fp16 1 26 gpu relu fp16 1 -27 gpu conv fp16 1 add fp32 1 relu fp32 1 +27 gpu conv fp16 1 add fp16 1 relu fp16 1 28 promise swing_level 7 29 promise swing_level 4 30 gpu add fp16 1 31 gpu relu fp16 1 -32 gpu conv fp16 1 add fp32 1 relu fp32 1 +32 gpu conv fp16 1 add fp16 1 relu fp16 1 33 promise swing_level 6 34 gpu add fp16 1 35 gpu relu fp16 1 @@ -1660,6 +1660,6 @@ conf36 3.28257993297 0 87.6709994 2.5935009000000093 38 gpu add fp16 1 39 gpu relu fp16 1 40 gpu pool_max fp16 1 -41 gpu mul fp16 1 add fp32 1 -42 gpu softmax fp32 1 +41 gpu mul fp16 1 add fp16 1 +42 gpu softmax fp16 1 ----- diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/data/autotuner_data/tuner_confs_batch220.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/data/autotuner_data/tuner_confs_batch220.txt index 921e476fdc..ef02c06536 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/data/autotuner_data/tuner_confs_batch220.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/data/autotuner_data/tuner_confs_batch220.txt @@ -19,7981 +19,7981 @@ conf1 1 0 89.22 0 ----- +++++ conf1 1.76425623358 0 88.940002 0.679997999999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf2 1.80065893786 0 88.639999 0.8700014999999937 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 30 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 30 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf3 1.81931170254 0 88.699997 0.780004500000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 28 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 28 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf4 1.79216077981 0 88.619995 0.9000074999999939 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf5 1.79216077981 0 88.639999 0.8700014999999937 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf6 1.77922486006 0 88.680008 0.809987999999997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf7 1.79216077981 0 88.620003 0.8999955000000028 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf8 1.77092734677 0 88.62001 0.899985000000008 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf9 1.82735666462 0 89.259995 0.36000499999999536 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 28 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 28 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf10 1.8731571451 0 88.900009 0.47998650000000254 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 28 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf11 1.76714086835 0 88.580002 0.9599970000000084 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf12 1.75456235014 0 88.659996 0.8400059999999883 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf13 1.73206757651 0 88.660004 0.8399939999999972 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf14 1.85485798124 0 88.659996 0.8400059999999883 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 28 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 28 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf15 1.79216077981 0 88.639999 0.8700014999999937 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf16 1.8731571451 0 88.860001 0.5399985000000029 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf17 1.80065893786 0 88.659996 0.8400059999999883 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf18 1.76714086835 0 88.679993 0.8100105000000042 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf19 1.72420310346 0 88.579994 0.9600089999999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf20 1.82735666462 0 88.779999 0.6600014999999928 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv samp 33 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv samp 33 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf21 1.72420310346 0 88.600006 0.9299910000000082 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 36 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 36 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf22 1.83691289312 0 89.059998 0.5600020000000058 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 29 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 29 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf23 1.77922486006 0 88.579994 0.9600089999999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf24 1.80065893786 0 88.699997 0.780004500000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf25 1.74953736809 0 88.580002 0.9599970000000084 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf26 1.71787873983 0 88.599998 0.9300029999999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf27 1.80065893786 0 88.580002 0.9599970000000084 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf28 1.71222635014 0 88.720001 0.7499985000000038 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv fp16 1 add fp32 1 relu fp32 1 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv fp16 1 add fp16 1 relu fp16 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf29 1.77473008673 0 88.719994 0.7500089999999986 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf30 1.77922486006 0 88.580002 0.9599970000000084 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf31 1.73420544027 0 88.599998 0.9300029999999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf32 1.69835942298 0 88.680008 0.809987999999997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf33 1.80065893786 0 88.699997 0.780004500000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf34 1.79216077981 0 88.599998 0.9300029999999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf35 1.80065893786 0 88.639999 0.8700014999999937 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf36 1.85485798124 0 88.580002 0.9599970000000084 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 25 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 28 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 25 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 28 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf37 1.84997347341 0 88.759995 0.690007499999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 35 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 35 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf38 1.84584182312 0 89.180008 0.43999199999999805 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf39 1.77092734677 0 88.62001 0.899985000000008 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 28 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf40 1.78374245859 0 88.68 0.8099999999999881 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf41 1.79216077981 0 88.639999 0.8700014999999937 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf42 1.76580832719 0 88.800003 0.6299954999999926 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 36 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 36 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf43 1.71222635014 0 88.959999 0.6600010000000026 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv fp16 1 add fp32 1 relu fp32 1 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv fp16 1 add fp16 1 relu fp16 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf44 1.79605535648 0 88.719994 0.7500089999999986 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 36 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 36 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf45 1.80065893786 0 88.580002 0.9599970000000084 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf46 1.67387706831 0 88.599991 0.930013499999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv fp16 1 add fp32 1 relu fp32 1 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv fp16 1 add fp16 1 relu fp16 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf47 1.73785194592 0 88.620003 0.8999955000000028 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 36 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 36 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf48 1.77922486006 0 88.720001 0.7499985000000038 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf49 1.69079740767 0 88.599998 0.9300029999999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 28 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf50 1.71787873983 0 88.720009 0.7499864999999915 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 24 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 24 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf51 1.7464927509 0 88.580002 0.9599970000000084 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv fp16 1 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv fp16 1 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf52 1.8731571451 0 88.900009 0.47998650000000254 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf53 1.77922486006 0 88.660004 0.8399939999999972 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf54 1.80065893786 0 88.599998 0.9300029999999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf55 1.80993726378 0 88.599998 0.9300029999999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 28 add fp32 1 relu fp32 1 -7 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 28 add fp16 1 relu fp16 1 +7 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf56 1.84100462354 0 88.659996 0.8400059999999883 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 25 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 28 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 25 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 28 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf57 1.76714086835 0 88.599998 0.9300029999999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf58 1.71787873983 0 88.639999 0.8700014999999937 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf59 1.76270686606 0 88.599998 0.9300029999999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 23 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 23 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf60 1.75390528134 0 89.019997 0.6000029999999953 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf61 1.85485798124 0 88.660004 0.8399939999999972 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 28 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 28 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf62 1.71640972506 0 88.819992 0.6000119999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf63 1.73206757651 0 88.640007 0.8699895000000026 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf64 1.76714086835 0 88.699997 0.780004500000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf65 1.7464927509 0 88.68 0.8099999999999881 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf66 1.75895542178 0 88.620003 0.8999955000000028 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf67 1.72568549443 0 89.019997 0.6000029999999953 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 35 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 35 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf68 1.80065893786 0 88.639999 0.8700014999999937 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf69 1.77473008673 0 89.040001 0.5799989999999952 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf70 1.85485798124 0 88.619995 0.9000074999999939 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 28 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 28 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf71 1.72632158543 0 88.620003 0.8999955000000028 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 36 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 36 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf72 1.8731571451 0 88.940002 0.679997999999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf73 1.81931170254 0 88.599998 0.9300029999999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 28 add fp32 1 relu fp32 1 -9 gpu conv perf 24 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 28 add fp16 1 relu fp16 1 +9 gpu conv perf 24 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf74 1.77473008673 0 88.979996 0.640003999999999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf75 1.85485798124 0 88.659996 0.8400059999999883 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 28 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 28 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf76 1.72420310346 0 88.659996 0.8400059999999883 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf77 1.74801373375 0 88.900002 0.47999699999999734 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf78 1.80065893786 0 88.580002 0.9599970000000084 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf79 1.7464927509 0 88.659996 0.8400059999999883 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf80 1.80065893786 0 88.639999 0.8700014999999937 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf81 1.77922486006 0 88.619995 0.9000074999999939 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf82 1.75829505914 0 88.580002 0.9599970000000084 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 36 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 36 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf83 1.69426346779 0 88.659996 0.8400059999999883 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 36 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 36 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf84 1.77922486006 0 88.599998 0.9300029999999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv fp16 1 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv fp16 1 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf85 1.80065893786 0 88.579994 0.9600089999999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf86 1.78760049381 0 88.759995 0.690007499999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 36 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 36 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf87 1.82735666462 0 88.599998 0.9300029999999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 26 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 26 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf88 1.73206757651 0 88.759995 0.690007499999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf89 1.76580832719 0 88.979996 0.640003999999999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf90 1.94095332873 0 88.219994 1.5000089999999986 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 29 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 29 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf91 1.82664395417 0 88.259995 1.440007499999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf92 2.01384137222 0 87.960007 1.8899894999999916 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf93 1.96182128461 0 88.220001 1.4999985000000038 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf94 1.90129305101 0 88.340004 1.3199940000000083 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 23 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 23 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf95 1.93028708168 0 88.240005 1.4699925000000036 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf96 1.99270576133 0 88.219994 1.5000089999999986 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 24 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 24 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf97 1.95635801766 0 88.080002 1.7099970000000084 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf98 1.91685645364 0 88.199997 1.530004500000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 29 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 29 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf99 1.8731571451 0 87.959999 1.8900015000000039 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf100 1.99270576133 0 87.940002 1.919996999999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf101 1.93108298212 0 88.160004 1.5899939999999972 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf102 2.00236135985 0 88.020004 1.799993999999998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf103 1.91086046154 0 88.259995 1.440007499999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 23 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 23 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf104 1.95173810837 0 87.980003 1.8599955000000037 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf105 1.97200918641 0 88.160004 1.5899939999999972 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf106 1.91086046154 0 88.300003 1.3799954999999926 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 23 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 23 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf107 2.01384137222 0 88.300003 1.3799954999999926 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf108 1.93028708168 0 88.259995 1.440007499999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf109 2.03543013873 0 88.18 1.559999999999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf110 1.95173810837 0 88.159996 1.5900059999999883 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf111 1.90645269504 0 88.0 1.8299999999999983 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf112 1.8731571451 0 88.419998 1.2000029999999882 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf113 1.79284683027 0 88.220009 1.4999864999999915 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 28 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 28 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf114 2.03543013873 0 88.240005 1.4699925000000036 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf115 2.03543013873 0 88.239998 1.4700029999999984 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf116 1.99270576133 0 88.040001 1.7699984999999927 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf117 1.83691289312 0 88.300003 1.3799954999999926 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf118 1.90206521647 0 88.099998 1.6800029999999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf119 2.00808495866 0 88.300003 1.3799954999999926 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv fp16 1 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv fp16 1 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf120 1.90645269504 0 88.139999 1.6200014999999937 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf121 1.99270576133 0 87.939995 1.920007500000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 29 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 29 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf122 2.02457820502 0 88.300003 1.3799954999999926 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf123 2.00321781877 0 88.340004 1.3199940000000083 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf124 1.99270576133 0 88.160004 1.5899939999999972 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf125 1.8731571451 0 88.120003 1.6499955000000028 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf126 1.82735666462 0 88.239998 1.4700029999999984 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 23 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 34 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 23 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 34 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf127 1.93907823442 0 87.960007 1.8899894999999916 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 33 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 33 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf128 1.99270576133 0 87.979996 1.8600059999999985 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf129 1.99270576133 0 88.020004 1.799993999999998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf130 1.98706937371 0 88.279999 1.4100014999999928 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv fp16 1 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv fp16 1 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf131 1.8731571451 0 88.540001 1.0199984999999927 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 24 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 24 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf132 2.00321781877 0 88.379997 1.2600044999999938 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf133 2.02457820502 0 88.220001 1.4999985000000038 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf134 1.94633077883 0 88.340004 1.3199940000000083 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf135 1.95635801766 0 87.980003 1.8599955000000037 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf136 1.89182096856 0 88.479996 1.1100059999999985 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf137 1.86149312771 0 88.059998 1.7400030000000086 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 23 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 23 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf138 2.03543013873 0 88.160004 1.5899939999999972 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf139 2.00321781877 0 88.379997 1.2600044999999938 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 28 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf140 1.94095332873 0 88.059998 1.7400030000000086 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 35 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 35 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf141 1.91086046154 0 88.360001 1.289998500000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 23 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 23 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf142 1.94095332873 0 88.120003 1.6499955000000028 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 34 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 34 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf143 2.03543013873 0 88.32 1.3500000000000085 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf144 1.98230345381 0 88.139999 1.6200014999999937 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 28 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf145 1.89872368793 0 88.119995 1.6500074999999939 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 23 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 23 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf146 1.91086046154 0 88.280006 1.409990999999998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 23 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 23 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf147 2.01384137222 0 88.37999 1.2600149999999886 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf148 1.95173810837 0 87.959999 1.8900015000000039 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 24 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 24 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf149 1.90129305101 0 88.099998 1.6800029999999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 23 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 23 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf150 1.8799240947 0 87.940002 1.919996999999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 33 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 33 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf151 1.98146478132 0 88.000008 1.8299880000000073 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 34 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 34 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf152 2.02457820502 0 88.300003 1.3799954999999926 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf153 2.01384137222 0 87.960007 1.8899894999999916 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf154 1.89616125983 0 88.220009 1.4999864999999915 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv samp 36 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv samp 36 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf155 2.03543013873 0 88.019997 1.800004499999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf156 1.99270576133 0 88.080002 1.7099970000000084 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf157 1.8824427965 0 88.040001 1.7699984999999927 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 33 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 33 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf158 1.99270576133 0 88.040001 1.7699984999999927 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf159 1.94095332873 0 88.080002 1.7099970000000084 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 34 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 34 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf160 1.88168647759 0 88.460007 1.1399894999999916 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf161 1.9205246465 0 88.099998 1.6800029999999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf162 1.8731571451 0 88.300003 1.3799954999999926 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf163 2.01384137222 0 88.199997 1.530004500000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 28 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf164 1.98230345381 0 88.019997 1.800004499999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf165 1.96099985017 0 88.220009 1.4999864999999915 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 34 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 34 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf166 1.91086046154 0 88.080002 1.7099970000000084 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 23 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 23 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf167 1.94095332873 0 88.000008 1.8299880000000073 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 34 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 34 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf168 1.98230345381 0 88.159996 1.5900059999999883 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf169 1.91086046154 0 88.259995 1.440007499999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 23 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 23 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf170 2.03543013873 0 88.019997 1.800004499999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf171 1.90645269504 0 87.979996 1.8600059999999985 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 35 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 35 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf172 1.91086046154 0 88.259995 1.440007499999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 23 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 23 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf173 1.99270576133 0 88.199997 1.530004500000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 24 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 24 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf174 2.01384137222 0 88.139999 1.6200014999999937 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf175 1.99270576133 0 88.159996 1.5900059999999883 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf176 1.87891850018 0 88.18 1.559999999999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf177 2.03543013873 0 88.220001 1.4999985000000038 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf178 1.98146478132 0 88.220001 1.4999985000000038 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf179 1.90645269504 0 87.959999 1.8900015000000039 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 34 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 34 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf180 1.97200918641 0 87.959999 1.8900015000000039 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 28 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 28 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf181 1.93028708168 0 88.300003 1.3799954999999926 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf182 1.97951063586 0 87.959999 1.8900015000000039 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 33 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 33 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf183 1.90129305101 0 88.0 1.8299999999999983 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 25 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 23 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 25 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 23 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf184 1.81789935087 0 88.180008 1.559987999999997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 23 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 23 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf185 1.96924527959 0 88.0 1.8299999999999983 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 33 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 33 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf186 1.91086046154 0 88.159996 1.5900059999999883 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 23 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 23 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf187 1.93108298212 0 88.120003 1.6499955000000028 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 34 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 34 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf188 1.95635801766 0 88.119995 1.6500074999999939 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf189 1.91086046154 0 88.280006 1.409990999999998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 23 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 23 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf190 1.96924527959 0 88.0 1.8299999999999983 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 33 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 33 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf191 1.80065893786 0 88.099998 1.6800029999999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf192 1.96099985017 0 87.939995 1.920007500000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 35 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 35 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf193 1.89182096856 0 88.0 1.8299999999999983 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv fp16 1 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv fp16 1 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf194 1.86396265195 0 88.120003 1.6499955000000028 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 23 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 23 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf195 1.88168647759 0 88.400002 1.2299969999999973 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf196 2.02457820502 0 88.32 1.3500000000000085 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf197 2.03543013873 0 88.139999 1.6200014999999937 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf198 1.94095332873 0 88.040001 1.7699984999999927 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 34 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 34 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf199 1.94633077883 0 88.059998 1.7400030000000086 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf200 1.99270576133 0 88.119995 1.6500074999999939 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf201 1.93028708168 0 88.080002 1.7099970000000084 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 25 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 25 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf202 1.94633077883 0 88.279999 1.4100014999999928 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv fp16 1 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv fp16 1 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf203 1.86396265195 0 88.120003 1.6499955000000028 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 23 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 23 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf204 1.89872368793 0 88.280006 1.409990999999998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 23 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 34 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 23 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 34 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf205 1.95173810837 0 88.020004 1.799993999999998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf206 1.98230345381 0 88.099998 1.6800029999999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf207 2.01384137222 0 88.32 1.3500000000000085 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf208 1.96099985017 0 88.159996 1.5900059999999883 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf209 2.03543013873 0 88.099991 1.680013499999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf210 1.95635801766 0 88.120003 1.6499955000000028 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf211 1.95635801766 0 88.180008 1.559987999999997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf212 1.8824427965 0 88.160004 1.5899939999999972 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf213 1.78374245859 0 88.020004 1.799993999999998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf214 1.85657365827 0 88.180008 1.559987999999997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 33 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 33 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf215 1.93108298212 0 88.019997 1.800004499999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 34 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 34 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf216 1.8731571451 0 88.099998 1.6800029999999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 23 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 23 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf217 1.94095332873 0 88.279999 1.4100014999999928 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf218 1.91086046154 0 88.340004 1.3199940000000083 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf219 1.92131251512 0 88.400002 1.2299969999999973 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf220 1.98146478132 0 88.199997 1.530004500000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf221 1.99270576133 0 88.119995 1.6500074999999939 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf222 1.94095332873 0 87.959999 1.8900015000000039 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 34 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 34 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf223 2.01384137222 0 88.019997 1.800004499999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 24 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 24 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf224 1.98230345381 0 88.119995 1.6500074999999939 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf225 1.8824427965 0 88.139999 1.6200014999999937 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf226 1.91790307812 0 88.120003 1.6499955000000028 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 34 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 34 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf227 1.80065893786 0 89.120003 0.4999970000000019 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 23 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 36 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 23 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 36 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf228 2.02457820502 0 88.240005 1.4699925000000036 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf229 1.93028708168 0 88.159996 1.5900059999999883 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf230 1.9205246465 0 88.260002 1.4399969999999982 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf231 1.96099985017 0 88.159996 1.5900059999999883 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 36 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 36 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf232 2.01384137222 0 88.340004 1.3199940000000083 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf233 1.89872368793 0 88.159996 1.5900059999999883 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 23 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 36 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 23 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 36 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf234 1.97951063586 0 87.940002 1.919996999999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 33 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 33 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf235 1.91790307812 0 88.040001 1.7699984999999927 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 36 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 36 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf236 1.91086046154 0 88.300003 1.3799954999999926 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf237 1.90645269504 0 88.18 1.559999999999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf238 1.98230345381 0 88.159996 1.5900059999999883 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf239 1.81789935087 0 88.259995 1.440007499999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf240 1.97200918641 0 88.240005 1.4699925000000036 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf241 1.88168647759 0 88.120003 1.6499955000000028 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 35 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 35 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf242 1.96099985017 0 88.139999 1.6200014999999937 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 34 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 34 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf243 1.93108298212 0 88.159996 1.5900059999999883 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf244 1.95173810837 0 88.159996 1.5900059999999883 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 24 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 24 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf245 1.91086046154 0 88.0 1.8299999999999983 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 30 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 30 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf246 1.86322110696 0 88.120003 1.6499955000000028 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv samp 36 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv samp 36 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf247 1.91685645364 0 88.199997 1.530004500000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf248 1.85903013845 0 88.199997 1.530004500000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf249 1.9265815376 0 87.939995 1.920007500000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf250 2.03543013873 0 88.219994 1.5000089999999986 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf251 1.9265815376 0 88.059998 1.7400030000000086 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf252 2.02457820502 0 88.059998 1.7400030000000086 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf253 1.97200918641 0 88.279999 1.4100014999999928 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf254 1.94175805117 0 87.979996 1.8600059999999985 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf255 1.99270576133 0 88.040001 1.7699984999999927 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf256 1.91790307812 0 88.300003 1.3799954999999926 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 34 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 34 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf257 1.99270576133 0 88.220001 1.4999985000000038 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf258 2.00236135985 0 89.080002 0.5399980000000056 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf259 1.91086046154 0 88.340004 1.3199940000000083 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf260 1.95635801766 0 87.920006 1.9499909999999971 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf261 2.01384137222 0 88.240005 1.4699925000000036 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf262 2.01384137222 0 87.959999 1.8900015000000039 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 24 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 24 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf263 1.99270576133 0 88.0 1.8299999999999983 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf264 1.86470478744 0 88.559998 0.9900030000000086 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf265 1.97200918641 0 88.200005 1.5299924999999917 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf266 1.95173810837 0 88.040001 1.7699984999999927 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 24 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 24 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf267 1.97200918641 0 88.199997 1.530004500000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf268 1.91242101403 0 88.019997 1.800004499999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 29 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 29 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf269 1.93028708168 0 88.280006 1.409990999999998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf270 2.00808495866 0 88.200005 1.5299924999999917 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf271 1.97951063586 0 88.040001 1.7699984999999927 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 33 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 33 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf272 1.97200918641 0 88.279999 1.4100014999999928 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 24 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 24 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf273 2.02457820502 0 87.400002 2.7299969999999973 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf274 1.90283800938 0 87.260002 2.939996999999998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf275 2.02457820502 0 87.340004 2.8199940000000083 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf276 1.98146478132 0 87.380005 2.7599925000000027 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf277 1.79056203543 0 87.699997 2.280004500000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 36 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 33 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 36 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 33 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf278 2.03543013873 0 88.040001 1.7699984999999927 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf279 1.915288657 0 87.800003 2.1299954999999926 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 33 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 33 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf280 1.84656959634 0 87.32 2.8500000000000085 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv perf 24 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv perf 24 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf281 1.99270576133 0 87.979996 1.8600059999999985 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf282 1.90206521647 0 87.560013 2.4899805000000015 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 24 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 24 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf283 1.95011275736 0 87.599998 2.4300029999999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 27 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 27 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf284 1.78510221508 0 87.860001 2.039998500000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf285 2.03543013873 0 87.620003 2.399995500000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf286 1.8731571451 0 87.260002 2.939996999999998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 28 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 28 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf287 1.98146478132 0 87.519997 2.550004499999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf288 1.8799240947 0 87.360001 2.789998500000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 24 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 36 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 24 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 36 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf289 1.86470478744 0 87.379997 2.7600044999999938 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv perf 24 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv perf 24 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf290 1.93028708168 0 87.759995 2.190007499999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 27 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 27 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf291 1.89182096856 0 87.459999 2.640001500000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 27 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 27 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf292 2.00808495866 0 87.419998 2.700002999999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf293 1.95011275736 0 87.919998 1.9500029999999882 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 27 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 27 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf294 2.03543013873 0 87.300003 2.8799954999999926 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf295 1.91685645364 0 87.440002 2.669996999999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf296 1.85632836726 0 87.360001 2.789998500000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf297 1.95011275736 0 87.840004 2.0699940000000083 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 27 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 27 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf298 2.03543013873 0 87.340004 2.8199940000000083 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf299 2.03543013873 0 87.419998 2.700002999999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf300 1.93028708168 0 87.920006 1.9499909999999971 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf301 1.94014927301 0 87.599998 2.4300029999999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 27 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 27 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf302 1.89335056537 0 87.220001 2.999998500000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf303 1.83763364104 0 87.479996 2.6100059999999985 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf304 1.86470478744 0 87.259995 2.940007499999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv perf 24 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv perf 24 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf305 1.94552226256 0 87.400002 2.7299969999999973 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf306 1.89182096856 0 87.919998 1.9500029999999882 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 27 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 27 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf307 1.86470478744 0 87.379997 2.7600044999999938 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv perf 24 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv perf 24 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf308 1.91086046154 0 87.82 2.1000000000000085 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 27 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 27 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf309 1.92499779889 0 87.739998 2.2200029999999984 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 33 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 33 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf310 1.98230345381 0 87.37999 2.7600149999999886 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf311 1.9265815376 0 87.860001 2.039998500000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv fp16 1 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv fp16 1 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf312 1.87741212392 0 87.459999 2.640001500000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 33 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 33 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf313 1.91086046154 0 87.620003 2.399995500000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 27 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 27 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf314 1.90052151224 0 88.339996 1.3200059999999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf315 1.8219067218 0 87.900002 1.9799969999999973 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 33 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 33 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf316 1.78510221508 0 87.480003 2.6099955000000037 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 29 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 29 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf317 1.93028708168 0 87.659996 2.3400059999999883 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 27 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 28 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 27 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 28 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf318 1.99270576133 0 88.32 1.3500000000000085 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf319 1.93187953917 0 87.440002 2.669996999999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf320 2.01384137222 0 87.339996 2.8200059999999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 29 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 29 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf321 1.93028708168 0 88.040009 1.7699865000000017 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf322 1.95011275736 0 87.540001 2.5199984999999927 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 27 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 27 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf323 1.99270576133 0 87.599998 2.4300029999999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 25 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 25 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf324 1.82806993146 0 87.44001 2.669984999999997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv perf 24 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv perf 24 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf325 1.99270576133 0 87.599998 2.4300029999999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf326 1.98230345381 0 87.619995 2.400007499999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf327 1.89182096856 0 87.699997 2.280004500000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 23 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 23 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf328 1.98146478132 0 87.440002 2.669996999999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 36 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 36 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf329 1.78828305664 0 87.420006 2.699990999999997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf330 1.95011275736 0 87.5 2.5799999999999983 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 21 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 21 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf331 1.86470478744 0 88.459999 1.1400015000000039 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf332 1.915288657 0 87.899994 1.9800089999999884 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 33 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 33 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf333 1.89182096856 0 87.660004 2.3399939999999972 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf334 1.90129305101 0 87.32 2.8500000000000085 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 27 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 27 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf335 1.83691289312 0 87.499992 2.5800119999999893 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 24 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 24 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf336 1.99270576133 0 87.240005 2.9699925000000036 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf337 1.89335056537 0 87.379997 2.7600044999999938 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf338 1.86322110696 0 87.68 2.309999999999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 27 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv samp 36 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 27 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv samp 36 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf339 1.90283800938 0 87.560005 2.4899924999999925 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf340 1.93028708168 0 87.319992 2.8500119999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf341 1.83547309207 0 88.080002 1.7099970000000084 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 29 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 29 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf342 2.02457820502 0 87.340004 2.8199940000000083 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf343 1.92499779889 0 87.319992 2.8500119999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 27 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 36 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 27 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 36 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf344 1.91086046154 0 87.580002 2.4599970000000084 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 27 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 27 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf345 1.8731571451 0 87.699997 2.280004500000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 26 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 26 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf346 1.99270576133 0 87.900002 1.9799969999999973 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf347 1.915288657 0 87.660004 2.3399939999999972 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 27 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 28 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 27 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf348 2.02457820502 0 87.980003 1.8599955000000037 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf349 2.01384137222 0 87.860001 2.039998500000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf350 2.01384137222 0 88.080002 1.7099970000000084 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf351 1.89335056537 0 87.259995 2.940007499999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf352 1.95011275736 0 87.62001 2.399985000000008 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 27 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 27 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf353 1.82735666462 0 87.420006 2.699990999999997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf354 1.90052151224 0 87.319992 2.8500119999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 27 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 27 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf355 1.81461237596 0 87.639999 2.3700014999999937 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf356 1.99270576133 0 88.120003 1.6499955000000028 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf357 1.97200918641 0 87.259995 2.940007499999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf358 2.03543013873 0 87.759995 2.190007499999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf359 1.88168647759 0 87.680008 2.309987999999997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf360 1.87490685722 0 87.280006 2.909990999999998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 33 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 34 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 33 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 34 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf361 1.82735666462 0 88.259995 1.440007499999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 36 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 36 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf362 1.93187953917 0 87.259995 2.940007499999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf363 1.95173810837 0 87.800003 2.1299954999999926 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf364 1.78510221508 0 87.5 2.5799999999999983 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf365 1.85338992274 0 87.620003 2.399995500000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 27 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 27 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf366 1.8824427965 0 87.580002 2.4599970000000084 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf367 1.85412366139 0 87.759995 2.190007499999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 33 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 33 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf368 1.74714427646 0 87.319992 2.8500119999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 28 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 28 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf369 1.98230345381 0 87.240005 2.9699925000000036 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf370 1.90309574658 0 87.420006 2.699990999999997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 33 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 33 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf371 1.85903013845 0 88.099991 1.680013499999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv fp16 1 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv fp16 1 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf372 1.83140588579 0 87.440002 2.669996999999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 33 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 34 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 33 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 34 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf373 1.98230345381 0 87.360001 2.789998500000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf374 1.915288657 0 87.539993 2.520010500000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 33 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 33 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf375 2.03543013873 0 87.840004 2.0699940000000083 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf376 1.99270576133 0 87.259995 2.940007499999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf377 1.78510221508 0 87.339996 2.8200059999999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf378 1.7844220778 0 87.560005 2.4899924999999925 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv perf 24 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv perf 24 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf379 1.93028708168 0 87.599998 2.4300029999999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 27 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 27 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf380 1.9356055111 0 87.699997 2.280004500000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 23 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 23 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf381 1.93028708168 0 87.619995 2.400007499999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf382 1.82569453792 0 87.419998 2.700002999999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 24 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 28 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 24 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf383 1.8731571451 0 87.699997 2.280004500000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf384 1.82735666462 0 88.019997 1.800004499999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 33 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 33 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf385 2.03543013873 0 87.280006 2.909990999999998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 25 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 25 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf386 1.95011275736 0 87.599998 2.4300029999999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 27 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 27 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf387 1.89616125983 0 87.879997 2.0100044999999938 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 33 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 33 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf388 1.98230345381 0 87.220001 2.999998500000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf389 1.98230345381 0 87.259995 2.940007499999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf390 1.97200918641 0 87.259995 2.940007499999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf391 1.94095332873 0 87.780006 2.159990999999998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 35 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 35 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf392 1.80204461771 0 87.319992 2.8500119999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf393 1.94175805117 0 87.32 2.8500000000000085 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf394 1.915288657 0 87.580002 2.4599970000000084 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 27 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 27 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf395 1.77607613238 0 87.560005 2.4899924999999925 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv perf 24 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv perf 24 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf396 1.93187953917 0 87.360001 2.789998500000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf397 1.97117920006 0 87.380005 2.7599925000000027 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf398 1.88168647759 0 87.860008 2.039988000000008 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv samp 36 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv samp 36 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf399 1.97117920006 0 87.580002 2.4599970000000084 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf400 1.91685645364 0 87.580002 2.4599970000000084 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf401 2.03543013873 0 88.099998 1.6800029999999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf402 1.8731571451 0 88.060005 1.7399924999999925 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf403 1.7844220778 0 87.279999 2.910001499999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 36 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv perf 24 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 24 add fp32 1 relu fp32 1 -10 gpu conv samp 34 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv fp16 1 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 36 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv perf 24 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 24 add fp16 1 relu fp16 1 +10 gpu conv samp 34 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv fp16 1 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf404 1.93028708168 0 87.599998 2.4300029999999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf405 2.03543013873 0 87.959999 1.8900015000000039 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf406 1.82167050509 0 87.480003 2.6099955000000037 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 33 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 33 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf407 1.83379617427 0 87.319992 2.8500119999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf408 1.83763364104 0 87.239998 2.9700029999999984 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv perf 24 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv perf 24 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf409 1.85338992274 0 87.62001 2.399985000000008 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 27 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 27 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf410 1.98230345381 0 87.479996 2.6100059999999985 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf411 1.97200918641 0 87.339996 2.8200059999999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 23 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 23 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf412 1.89335056537 0 87.32 2.8500000000000085 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf413 1.91086046154 0 87.82 2.1000000000000085 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 27 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 27 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf414 1.90052151224 0 87.62001 2.399985000000008 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 27 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 36 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 27 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 36 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf415 1.81227180478 0 87.260002 2.939996999999998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 33 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 33 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf416 1.92499779889 0 87.240005 2.9699925000000036 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 33 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 33 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf417 1.84172858661 0 87.240005 2.9699925000000036 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 36 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv perf 24 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 36 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv perf 24 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf418 1.83691289312 0 87.459999 2.640001500000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf419 1.82735666462 0 87.68 2.309999999999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf420 1.99270576133 0 87.340004 2.8199940000000083 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/data/autotuner_data/tuner_pareto_confs_batch220.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/data/autotuner_data/tuner_pareto_confs_batch220.txt index e6eaa5ceb5..d5b6d03628 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/data/autotuner_data/tuner_pareto_confs_batch220.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/data/autotuner_data/tuner_pareto_confs_batch220.txt @@ -19,1179 +19,1179 @@ conf1 1 0 89.22 0 ----- +++++ conf1 1.82735666462 0 89.259995 0.36000499999999536 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 28 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 28 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf2 1.8731571451 0 88.900009 0.47998650000000254 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 28 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf3 1.85485798124 0 88.659996 0.8400059999999883 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 28 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 28 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf4 1.8731571451 0 88.860001 0.5399985000000029 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf5 1.82735666462 0 88.779999 0.6600014999999928 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv samp 33 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv samp 33 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf6 1.83691289312 0 89.059998 0.5600020000000058 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 29 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 29 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf7 1.85485798124 0 88.580002 0.9599970000000084 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 25 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 28 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 25 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 28 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf8 1.84997347341 0 88.759995 0.690007499999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 35 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 35 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf9 1.84584182312 0 89.180008 0.43999199999999805 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf10 1.8731571451 0 88.900009 0.47998650000000254 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf11 1.84100462354 0 88.659996 0.8400059999999883 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 25 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 28 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 25 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 28 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf12 1.85485798124 0 88.660004 0.8399939999999972 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 28 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 28 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf13 1.85485798124 0 88.619995 0.9000074999999939 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 28 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 28 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf14 1.8731571451 0 88.940002 0.679997999999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf15 1.85485798124 0 88.659996 0.8400059999999883 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 28 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 28 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf16 1.82735666462 0 88.599998 0.9300029999999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 26 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 26 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf17 2.03543013873 0 88.18 1.559999999999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf18 2.03543013873 0 88.240005 1.4699925000000036 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf19 2.03543013873 0 88.239998 1.4700029999999984 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf20 2.02457820502 0 88.300003 1.3799954999999926 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf21 2.02457820502 0 88.220001 1.4999985000000038 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf22 2.03543013873 0 88.160004 1.5899939999999972 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf23 2.03543013873 0 88.32 1.3500000000000085 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf24 2.02457820502 0 88.300003 1.3799954999999926 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf25 2.03543013873 0 88.019997 1.800004499999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf26 2.03543013873 0 88.019997 1.800004499999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf27 2.03543013873 0 88.220001 1.4999985000000038 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf28 2.02457820502 0 88.32 1.3500000000000085 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf29 2.03543013873 0 88.139999 1.6200014999999937 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf30 2.03543013873 0 88.099991 1.680013499999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf31 2.02457820502 0 88.240005 1.4699925000000036 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf32 2.03543013873 0 88.219994 1.5000089999999986 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf33 2.02457820502 0 88.059998 1.7400030000000086 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf34 2.00236135985 0 89.080002 0.5399980000000056 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf35 2.02457820502 0 87.400002 2.7299969999999973 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf36 2.02457820502 0 87.340004 2.8199940000000083 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf37 2.03543013873 0 88.040001 1.7699984999999927 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf38 1.99270576133 0 87.979996 1.8600059999999985 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf39 2.03543013873 0 87.620003 2.399995500000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf40 2.00808495866 0 87.419998 2.700002999999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf41 2.03543013873 0 87.300003 2.8799954999999926 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf42 2.03543013873 0 87.340004 2.8199940000000083 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf43 2.03543013873 0 87.419998 2.700002999999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf44 1.99270576133 0 88.32 1.3500000000000085 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf45 2.01384137222 0 87.339996 2.8200059999999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 29 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 29 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf46 1.99270576133 0 87.599998 2.4300029999999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 25 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 25 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf47 1.99270576133 0 87.599998 2.4300029999999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf48 1.86470478744 0 88.459999 1.1400015000000039 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf49 1.99270576133 0 87.240005 2.9699925000000036 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf50 2.02457820502 0 87.340004 2.8199940000000083 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf51 1.99270576133 0 87.900002 1.9799969999999973 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf52 2.02457820502 0 87.980003 1.8599955000000037 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf53 2.01384137222 0 87.860001 2.039998500000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf54 2.01384137222 0 88.080002 1.7099970000000084 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf55 1.99270576133 0 88.120003 1.6499955000000028 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf56 2.03543013873 0 87.759995 2.190007499999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf57 2.03543013873 0 87.840004 2.0699940000000083 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf58 1.99270576133 0 87.259995 2.940007499999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf59 2.03543013873 0 87.280006 2.909990999999998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 25 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 25 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf60 2.03543013873 0 88.099998 1.6800029999999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf61 2.03543013873 0 87.959999 1.8900015000000039 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf62 1.99270576133 0 87.340004 2.8199940000000083 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/data/autotuner_data/tuner_promise_confs_batch220_multi.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/data/autotuner_data/tuner_promise_confs_batch220_multi.txt index 08022321e0..b4728be2a5 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/data/autotuner_data/tuner_promise_confs_batch220_multi.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/data/autotuner_data/tuner_promise_confs_batch220_multi.txt @@ -19,1950 +19,1950 @@ conf1 1 0 89.22 0 ----- +++++ conf1 2.22312589401 0 89.006667325 0.6133326750000038 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 7 promise swing_level 5 8 promise swing_level 6 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 7 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf2 2.79726854294 0 87.8775005 2.0137492500000036 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 7 8 promise swing_level 5 -9 gpu conv perf 22 add fp32 1 relu fp32 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 10 promise swing_level 5 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf3 2.88604132716 0 87.965832775 1.881250837500005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 7 promise swing_level 7 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 5 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf4 3.39390401599 0 87.8625003 2.036249550000008 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 5 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 7 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf5 2.95120798947 0 87.954167275 1.8987490874999935 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf6 2.45726350874 0 87.860832275 2.0387515874999877 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 7 promise swing_level 5 8 promise swing_level 7 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf7 2.29810446067 0 88.0399999 1.7700001500000013 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf8 3.58520812283 0 87.97166725 1.8724991250000045 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf9 3.94997181369 0 87.56666705 2.4799994249999884 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 7 8 promise swing_level 3 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf10 3.11902161192 0 87.546666225 2.510000662500005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 7 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf11 4.89010044114 0 87.38999925 2.7450011249999946 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 6 8 promise swing_level 3 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf12 4.69428803602 0 87.3849995 2.7525007499999887 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 7 8 promise swing_level 3 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf13 4.67686538966 0 87.499167 2.5812494999999984 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 6 8 promise swing_level 3 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf14 4.52292512285 0 87.404999375 2.7225009374999942 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 7 8 promise swing_level 3 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf15 3.86569311474 0 87.544999825 2.512500262499991 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 8 promise swing_level 3 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf16 4.69428803602 0 87.411666175 2.712500737500008 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 7 8 promise swing_level 3 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf17 3.62381381809 0 87.345833225 2.811250162499988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf18 3.24782542868 0 87.902499975 1.9762500375000016 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 9 promise swing_level 5 10 promise swing_level 5 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf19 2.90537724252 0 87.34583385 2.8112492249999903 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 7 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf20 4.33991036822 0 87.3708331 2.7737503500000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 7 8 promise swing_level 3 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf21 2.92741170033 0 87.480833025 2.608750462500005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 5 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf22 3.06626887804 0 87.414999 2.7075015000000064 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 7 9 promise swing_level 3 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf23 3.08522239286 0 87.938333575 1.922499637499996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 5 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf24 3.37235767581 0 87.734167675 2.228748487500006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 9 promise swing_level 7 10 promise swing_level 5 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf25 3.22675411306 0 87.770000075 2.1749998875000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 9 promise swing_level 5 10 promise swing_level 5 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf26 3.53225531663 0 87.35833385 2.7924992250000074 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 9 promise swing_level 5 10 promise swing_level 5 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf27 2.25968451738 0 88.108333275 1.6675000874999881 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 7 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf28 4.02101952804 0 87.7124998 2.2612502999999933 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 6 8 promise swing_level 3 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf29 2.9881132992 0 87.8450002 2.0624996999999965 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 -9 gpu conv samp 32 add fp32 1 relu fp32 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 10 promise swing_level 5 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf30 4.22938970632 0 87.65166655 2.352500174999996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf31 3.31996417301 0 87.87666745 2.0149988249999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 9 promise swing_level 5 10 promise swing_level 5 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf32 2.84588159936 0 88.363332625 1.285001062500001 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 9 promise swing_level 5 10 promise swing_level 5 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf33 3.44027448615 0 87.991666575 1.8425001375000036 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 9 promise swing_level 5 10 promise swing_level 5 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf34 4.7351841274 0 87.418333975 2.7024990375000044 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 6 8 promise swing_level 3 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf35 3.31808248998 0 87.29166815 2.8924977749999883 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 7 9 promise swing_level 5 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf36 4.31094843819 0 87.4375006 2.6737490999999878 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 7 8 promise swing_level 3 9 promise swing_level 5 10 promise swing_level 5 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf37 3.70919625868 0 87.374999825 2.7675002624999934 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 9 promise swing_level 3 10 promise swing_level 5 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf38 3.94997181369 0 87.579167575 2.461248637499999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 7 8 promise swing_level 3 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf39 4.15589354787 0 87.682500075 2.306249887500009 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf40 4.02914883006 0 87.33500035 2.827499474999996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf41 4.29008515477 0 87.584166775 2.4537498374999984 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 7 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf42 3.10283486565 0 87.5333332 2.5300001999999964 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 5 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 7 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf43 4.572231956 0 87.436668425 2.674997362500008 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 6 8 promise swing_level 3 9 promise swing_level 7 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf44 3.26196341921 0 87.5649984 2.4825024000000084 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 5 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf45 4.39090652492 0 87.42333305 2.6950004250000035 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 7 8 promise swing_level 3 9 promise swing_level 6 10 promise swing_level 6 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf46 3.22371867403 0 87.670832 2.323751999999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 6 8 promise swing_level 6 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf47 2.9881132992 0 87.416667 2.7049994999999925 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 7 promise swing_level 5 8 promise swing_level 7 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf48 3.18549343688 0 87.382500625 2.7562490624999896 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 -9 gpu conv samp 32 add fp32 1 relu fp32 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 10 promise swing_level 5 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf49 3.46069601443 0 87.60833315 2.4175002749999877 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 9 promise swing_level 5 10 promise swing_level 5 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf50 3.31996417301 0 87.44749955 2.658750674999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 7 promise swing_level 5 8 promise swing_level 3 9 promise swing_level 5 10 promise swing_level 5 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf51 3.85323634209 0 87.60083245 2.4287513250000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 9 promise swing_level 6 10 promise swing_level 3 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf52 3.45869404191 0 87.67416745 2.318748825 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 9 promise swing_level 5 10 promise swing_level 5 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf53 3.49048578894 0 87.71000005 2.264999924999991 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 9 promise swing_level 5 10 promise swing_level 5 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf54 4.64593590175 0 87.25333365 2.9499995249999955 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 7 8 promise swing_level 3 9 promise swing_level 6 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf55 3.18549343688 0 87.410832825 2.7137507624999984 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf56 3.27450706009 0 87.619166725 2.401249912499999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 7 promise swing_level 7 8 promise swing_level 3 9 promise swing_level 5 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf57 3.08048740791 0 87.799999025 2.1300014624999903 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 9 promise swing_level 5 10 promise swing_level 5 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf58 3.42647315487 0 87.74416595 2.2137510750000047 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 5 10 promise swing_level 5 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf59 3.23027793555 0 87.489999675 2.5950004874999877 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 7 promise swing_level 5 8 promise swing_level 7 9 promise swing_level 5 10 promise swing_level 5 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf60 3.63036816218 0 87.829167225 2.086249162499989 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 7 promise swing_level 7 8 promise swing_level 3 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf61 3.4210918644 0 87.482499725 2.606250412500003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 5 10 promise swing_level 5 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf62 2.81904476363 0 87.59750055 2.4337491749999884 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 7 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf63 3.91442674653 0 87.61583295 2.4062505750000014 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 5 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 6 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf64 3.20815052809 0 88.079168075 1.7112478875000008 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf65 2.76237357142 0 87.8908336 1.9937496000000081 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 7 promise swing_level 7 8 promise swing_level 7 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf66 3.8889652023 0 88.084167075 1.7037493875000038 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 3 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf67 3.65477924348 0 87.61000065 2.414999024999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf68 3.72305465534 0 87.65916555 2.3412516750000023 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 6 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf69 4.22265290703 0 87.22583345 2.991249825000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 5 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf70 3.73701699657 0 87.9641672 1.8837491999999898 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 7 promise swing_level 6 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf71 3.1223139408 0 87.448332225 2.657501662499996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf72 4.47552559764 0 87.395001625 2.737497562499989 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf73 3.32916802495 0 87.91250015 1.9612497749999989 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 6 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf74 3.89273633436 0 87.91333355 1.9599996749999917 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 7 promise swing_level 3 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf75 2.77711551584 0 87.57166605 2.4725009249999914 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 7 promise swing_level 6 8 promise swing_level 3 9 promise swing_level 5 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 6 12 promise swing_level 5 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf76 3.69544065016 0 87.679165225 2.31125216249999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 3 10 promise swing_level 6 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf77 3.70230567751 0 87.628333175 2.3875002375000065 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 6 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 7 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf78 4.28315376855 0 87.56833295 2.4775005750000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf79 4.09962813497 0 87.36166675 2.7874998750000017 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf80 3.63977274615 0 87.551666525 2.5025002124999958 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 6 10 promise swing_level 3 11 promise swing_level 7 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf81 2.84691955592 0 87.674167825 2.3187482625000015 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf82 3.35532459557 0 87.69666555 2.2850016749999895 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf83 4.05098853786 0 87.443332275 2.6650015874999937 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf84 2.74749178504 0 87.44499825 2.662502625000002 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf85 3.32688197478 0 87.661667525 2.3374987125000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf86 3.01032874903 0 87.339166325 2.821250512500008 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 10 promise swing_level 3 11 promise swing_level 5 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf87 4.0953860212 0 87.583332625 2.4550010625000027 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 6 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf88 3.44922680129 0 87.3699999 2.775000150000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 7 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 5 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf89 2.21984182444 0 88.9241673 0.6958327000000054 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 7 promise swing_level 6 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 6 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf90 3.3666607999 0 87.8816672 2.007499200000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf91 3.959768091 0 87.686667775 2.299998337499993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 6 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf92 3.87758850268 0 87.5999997 2.4300004500000014 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf93 3.21424205348 0 87.4341656 2.6787515999999982 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf94 3.55482086232 0 87.467500825 2.6287487624999955 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf95 3.51717933382 0 87.55499955 2.4975006749999906 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 6 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf96 3.13170914898 0 88.15249975 1.6012503749999922 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf97 2.52632150587 0 87.70333305 2.2750004250000018 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf98 4.2353818578 0 87.541666025 2.5175009625000015 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf99 3.73294623828 0 87.849166975 2.056249537499994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf100 3.89273633436 0 87.619166125 2.4012508124999883 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf101 2.93640526336 0 87.28916585 2.896251224999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf102 2.82539388525 0 87.424999775 2.692500337499993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf103 4.46883521597 0 87.520833175 2.5487502374999877 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 7 promise swing_level 3 8 promise swing_level 6 9 promise swing_level 3 @@ -1970,2108 +1970,2108 @@ conf103 4.46883521597 0 87.520833175 2.5487502374999877 11 promise swing_level 3 12 promise swing_level 5 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf104 3.99682726015 0 87.56750025 2.478749625000006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 6 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf105 4.11867598703 0 87.574166625 2.4687500624999927 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 5 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf106 3.17789222237 0 87.7116655 2.2625017500000055 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 10 promise swing_level 7 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf107 2.66902126706 0 87.32749985 2.8387502250000054 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf108 3.17702992638 0 87.939999775 1.9200003374999923 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 6 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf109 3.79392955242 0 87.5916673 2.442499050000002 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 3 10 promise swing_level 5 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf110 3.99682726015 0 87.552500025 2.5012499624999975 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 7 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf111 2.50358846207 0 88.07833385 1.7124992249999877 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 promise swing_level 3 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf112 3.3666607999 0 87.7483328 2.2075007999999983 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf113 4.91121042638 0 87.3341672 2.8287492000000043 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 5 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf114 3.27446890333 0 87.9450001 1.9124998499999961 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 6 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf115 2.91950398768 0 87.53499985 2.52750022499999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf116 2.94286314393 0 87.99083355 1.8437496749999909 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 7 promise swing_level 7 8 promise swing_level 7 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf117 2.79824347053 0 88.00916575 1.8162513750000073 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 9 promise swing_level 7 10 promise swing_level 5 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 7 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf118 3.40500767379 0 88.1741661 1.5687508500000078 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 7 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf119 2.54782996541 0 87.7858328 2.151250800000007 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 7 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 5 11 promise swing_level 7 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf120 2.44744012416 0 88.089999575 1.6950006375000086 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 7 promise swing_level 7 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf121 3.78912071371 0 87.9108349 1.963747650000002 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 7 promise swing_level 3 8 promise swing_level 5 9 promise swing_level 7 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf122 2.80254123772 0 87.74583265 2.211251025000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 7 promise swing_level 6 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 6 12 promise swing_level 6 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf123 3.60628101133 0 87.845833725 2.06124941249999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 7 promise swing_level 3 8 promise swing_level 3 -9 gpu conv samp 32 add fp32 1 relu fp32 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 10 promise swing_level 7 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf124 3.42359268611 0 87.459999475 2.640000787499993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 6 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf125 3.11090718311 0 87.88999935 1.9950009750000035 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 6 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf126 3.06938366979 0 88.19083295 1.5437505749999971 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 7 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf127 3.22331191537 0 87.734999825 2.227500262499994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 3 10 promise swing_level 7 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf128 4.33910619993 0 87.847501325 2.058748012500004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 3 10 promise swing_level 7 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf129 3.33335410651 0 88.000832525 1.8287512124999878 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 3 10 promise swing_level 7 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf130 2.26805593313 0 87.304999625 2.872500562500008 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 7 promise swing_level 3 8 promise swing_level 5 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf131 4.18791473608 0 87.389168025 2.746247962499993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 6 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf132 2.80220587255 0 87.903334475 1.9749982875000072 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 3 8 promise swing_level 6 9 promise swing_level 3 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf133 4.17113055975 0 87.67583375 2.3162493750000053 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 6 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf134 3.93889835168 0 87.952500025 1.901249962499989 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 7 10 promise swing_level 7 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf135 3.91824744464 0 87.4125023 2.7112465499999985 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf136 2.44571473308 0 87.342499925 2.8162501125000006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf137 4.22119398129 0 87.338332975 2.822500537499998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 6 11 promise swing_level 7 12 promise swing_level 3 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf138 4.90915126278 0 87.302499475 2.876250787499991 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf139 4.21669669353 0 87.814167575 2.1087486374999997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 3 10 promise swing_level 7 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf140 3.75108445622 0 87.828332875 2.087500687499997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 7 10 promise swing_level 7 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf141 4.76061397485 0 87.410831825 2.7137522624999946 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 3 10 promise swing_level 7 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf142 2.95449706541 0 87.53333275 2.530000874999999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 5 8 promise swing_level 6 9 promise swing_level 5 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf143 2.4437154746 0 87.961666675 1.887499987499993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 7 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf144 4.7155572369 0 87.380000675 2.7599989874999906 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf145 4.26178224546 0 87.615001225 2.407498162499998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 9 promise swing_level 3 10 promise swing_level 7 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf146 4.59240711912 0 87.433332825 2.6800007625000077 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 6 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf147 3.61063669031 0 87.8116656 2.1125016000000016 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf148 3.58036603881 0 87.73916455 2.221253175000001 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 6 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf149 3.36714489097 0 87.775000625 2.167499062499992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 6 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf150 3.24872658833 0 87.49916765 2.5812485249999924 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 3 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf151 4.39008334862 0 87.29583455 2.8862481750000057 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 3 11 promise swing_level 6 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf152 2.85269979653 0 87.4049998 2.7225003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 6 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf153 3.34828812808 0 87.664999775 2.332500337500001 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 6 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf154 3.89781194593 0 87.642500125 2.3662498125000013 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 6 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 6 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf155 4.00217812894 0 87.63749985 2.373750225000002 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 6 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 6 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf156 2.7230507567 0 87.69833235 2.2825014749999966 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf157 3.3992411676 0 87.452500275 2.65124958749999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf158 3.03052200765 0 87.438333575 2.672499637499996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 6 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 7 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf159 2.67407559534 0 87.426667225 2.689999162499994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 6 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf160 2.89585605158 0 87.32416655 2.8437501749999967 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf161 3.37239814804 0 87.42833305 2.687500424999989 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 6 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf162 3.16529194069 0 87.500833125 2.5787503124999986 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 9 promise swing_level 7 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf163 3.46825620155 0 87.411667125 2.7124993125000074 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 7 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf164 2.74905073367 0 87.44833355 2.657499674999997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 7 promise swing_level 5 8 promise swing_level 6 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 5 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf165 3.37239814804 0 87.689168225 2.2962476624999937 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 6 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf166 2.9629705972 0 87.37750055 2.763749175000008 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 6 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf167 2.70309096415 0 87.4308331 2.683750349999997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf168 4.01292296369 0 87.66166665 2.337500024999997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 6 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 7 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf169 3.84269789692 0 87.470835375 2.623746937500009 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf170 3.959768091 0 87.613332825 2.4100007624999975 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 6 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 5 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf171 3.07253839114 0 87.65166645 2.3525003250000083 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 7 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf172 4.40950995119 0 87.4466671 2.6599993499999997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf173 3.06784222957 0 87.36250025 2.7862496250000035 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf174 4.0675243274 0 87.580000625 2.459999062500003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 6 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf175 3.49869949155 0 87.42666675 2.689999875000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf176 3.31440369918 0 87.89083325 1.9937501249999983 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 7 promise swing_level 6 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 7 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf177 2.98957564167 0 87.97583395 1.866249075000006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 7 promise swing_level 6 8 promise swing_level 3 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf178 2.49571716644 0 88.3791682 1.2612477000000055 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 7 promise swing_level 5 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 6 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf179 3.63257386824 0 87.67166705 2.3224994250000037 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf180 2.29373373706 0 87.3775007 2.76374895 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf181 3.26200128507 0 87.447501525 2.658747712499988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 7 10 promise swing_level 6 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf182 4.27091529853 0 87.54749935 2.508750975000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 6 8 promise swing_level 7 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf183 3.46419437211 0 87.65166645 2.3525003250000083 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 -9 gpu conv samp 32 add fp32 1 relu fp32 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf184 3.67278797036 0 87.37750015 2.7637497749999937 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 3 8 promise swing_level 7 9 promise swing_level 6 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf185 3.58255705929 0 87.415832825 2.706250762500005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf186 2.54027539556 0 88.04916635 1.7562504750000087 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 5 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf187 2.9367735215 0 87.389999625 2.745000562499996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 6 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf188 2.71933521405 0 87.917501075 1.9537483874999921 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf189 4.02441715614 0 87.6583342 2.3424987 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 6 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf190 2.71936152972 0 87.399165525 2.731251712499997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 7 promise swing_level 5 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf191 3.93371524437 0 87.821667275 2.0974990875000046 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 6 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf192 2.90285613143 0 87.91499975 1.957500374999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 7 promise swing_level 5 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 6 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf193 2.58393260167 0 87.37750015 2.7637497749999937 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf194 3.32365532462 0 87.42499945 2.692500824999996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf195 3.38002422456 0 87.445831475 2.66125278749999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 7 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf196 3.45024320761 0 87.4049996 2.7225006000000036 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 6 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf197 3.57399084262 0 87.33999965 2.8200005250000046 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 6 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf198 3.69087809171 0 87.3116667 2.862499949999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 6 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf199 3.35348266569 0 87.295833425 2.8862498625000015 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 7 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 6 11 promise swing_level 5 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf200 2.47636288822 0 87.57249915 2.471251275 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 29 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 29 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 7 promise swing_level 3 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 6 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf201 2.91387309429 0 87.4475012 2.658748199999991 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf202 2.22354808258 0 88.58666625 0.9500006250000084 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 7 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf203 3.58036603881 0 87.590833 2.443750499999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 6 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf204 2.43871043482 0 88.8666663 0.5300005499999898 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 7 7 promise swing_level 5 8 promise swing_level 3 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 7 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf205 3.59976716148 0 88.08083365 1.7087495249999947 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 5 10 promise swing_level 5 11 promise swing_level 7 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf206 3.35720865842 0 88.0383344 1.7724984000000035 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 5 11 promise swing_level 7 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf207 2.64856913689 0 88.45666565 1.1450015249999907 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 7 9 promise swing_level 7 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf208 3.72305465534 0 88.078334425 1.712498362500007 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 5 10 promise swing_level 3 11 promise swing_level 5 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf209 2.62041316054 0 88.210833625 1.5137495624999886 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf210 3.23247029166 0 87.28916645 2.896250325000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 5 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 7 10 promise swing_level 3 11 promise swing_level 7 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf211 3.41718124763 0 87.4116671 2.7124993499999945 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 5 8 promise swing_level 3 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 5 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf212 3.07971090716 0 87.445834025 2.6612489625000038 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 3 8 promise swing_level 6 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf213 2.20591850054 0 87.40166585 2.7275012249999975 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 23 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 23 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf214 2.6053018248 0 87.883333075 2.0050003875000044 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 7 promise swing_level 5 8 promise swing_level 3 9 promise swing_level 3 @@ -4079,512 +4079,512 @@ conf214 2.6053018248 0 87.883333075 2.0050003875000044 11 promise swing_level 7 12 promise swing_level 6 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf215 3.28346025846 0 87.76500005 2.182499925000002 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 7 promise swing_level 5 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 7 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf216 3.11135496304 0 87.321666825 2.847499762500007 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 5 8 promise swing_level 6 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf217 3.21854939467 0 87.339166525 2.8212502125000043 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 5 8 promise swing_level 6 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf218 2.55547608094 0 87.26916725 2.9262491250000053 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 7 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf219 3.11135496304 0 87.56416635 2.483750475000008 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 5 8 promise swing_level 6 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf220 2.87220643768 0 87.521666525 2.5475002124999975 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf221 2.77618267928 0 87.439166325 2.671250512499995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 7 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 7 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf222 3.05334175057 0 87.425834175 2.69124873749999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 5 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf223 2.33585182653 0 88.35333315 1.3000002750000021 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 7 promise swing_level 5 8 promise swing_level 6 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf224 2.21716206658 0 87.3116662 2.862500699999991 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 7 promise swing_level 5 8 promise swing_level 6 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf225 2.69120853177 0 87.656667675 2.344998487500007 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 7 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf226 3.1703271975 0 87.6783335 2.3124997500000077 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 6 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf227 2.97212144641 0 87.367499325 2.7787510125000026 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 7 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf228 2.25144682528 0 87.3958334 2.736249899999997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 7 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf229 2.86912722694 0 87.202500275 3.02624958749999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 5 8 promise swing_level 6 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 7 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf230 2.87079797879 0 87.47999995 2.610000074999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf231 2.56975480404 0 87.446666425 2.660000362499993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf232 2.69696840237 0 87.287500325 2.898749512500004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf233 2.88189757495 0 87.3600014 2.789997899999996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf234 2.43871043482 0 88.846666575 0.5600001374999977 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 7 7 promise swing_level 5 8 promise swing_level 3 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 7 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf235 3.05254571314 0 88.251667425 1.4524988625000077 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 5 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf236 2.98398831599 0 88.01166725 1.812499124999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 3 10 promise swing_level 5 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf237 3.16568418685 0 87.905833425 1.9712498625000023 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf238 2.62041316054 0 88.217499575 1.5037506374999907 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf239 3.3002347944 0 87.99583305 1.8362504249999958 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 7 8 promise swing_level 7 9 promise swing_level 5 10 promise swing_level 5 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf240 4.18941320219 0 87.5874997 2.4487504500000057 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 6 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 5 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf241 4.0675243274 0 87.470832425 2.623751362500002 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 7 promise swing_level 6 8 promise swing_level 5 9 promise swing_level 5 @@ -4592,626 +4592,626 @@ conf241 4.0675243274 0 87.470832425 2.623751362500002 11 promise swing_level 5 12 promise swing_level 5 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf242 3.32224072274 0 87.538332275 2.5225015874999954 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 6 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 7 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf243 4.17546908121 0 87.220834375 2.998748437500005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 9 promise swing_level 5 10 promise swing_level 5 11 promise swing_level 6 12 promise swing_level 5 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf244 3.00691426182 0 87.72833195 2.237502075000002 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 7 promise swing_level 6 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 7 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf245 4.0953860212 0 87.402499325 2.7262510125000077 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 6 8 promise swing_level 3 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf246 2.88189757495 0 87.379998625 2.7600020625 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf247 2.54462300687 0 87.7791658 2.1612512999999964 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf248 3.26693134436 0 87.919166625 1.9512500624999944 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 +4 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 7 promise swing_level 6 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 3 11 promise swing_level 5 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf249 3.10006219208 0 87.460834125 2.6387488124999905 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 7 promise swing_level 6 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 7 11 promise swing_level 6 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf250 2.92272273741 0 87.498334275 2.582498587499991 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 6 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 7 11 promise swing_level 5 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf251 2.43871043482 0 88.83583265 0.5762510249999977 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 7 7 promise swing_level 5 8 promise swing_level 3 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 7 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf252 2.83909591662 0 87.860833225 2.0387501625000084 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 7 promise swing_level 7 8 promise swing_level 5 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 5 11 promise swing_level 5 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf253 2.62041316054 0 88.179165975 1.5612510374999928 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf254 2.81041738465 0 88.41333255 1.210001174999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 8 promise swing_level 3 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf255 3.96177786409 0 87.65249905 2.351251424999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 6 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf256 3.45622671318 0 87.531667525 2.5324987124999936 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 10 promise swing_level 6 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf257 4.73893731565 0 87.360832475 2.7887512875000056 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf258 4.73893731565 0 87.286666275 2.9000005874999957 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv perf 25 add fp32 1 relu fp32 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf259 4.27169438746 0 87.3650001 2.7824998499999936 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 5 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 6 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf260 3.87887308869 0 87.9549999 1.897500149999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 7 promise swing_level 5 8 promise swing_level 5 9 promise swing_level 6 10 promise swing_level 3 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf261 4.41852349351 0 87.749167875 2.2062481875000017 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf262 3.37191254539 0 87.934166975 1.9287495375000034 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 10 promise swing_level 6 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf263 3.16486414663 0 87.485832525 2.6012512124999887 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 10 promise swing_level 6 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf264 3.59226623023 0 87.574167175 2.468749237499999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 6 10 promise swing_level 3 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf265 3.45822582959 0 87.5208324 2.5487513999999933 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 10 promise swing_level 6 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf266 3.36383970725 0 87.486666775 2.5999998374999933 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf267 3.82189695779 0 87.43416595 2.678751075000008 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 5 9 promise swing_level 6 10 promise swing_level 3 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf268 3.90420222535 0 87.695832675 2.28625098749999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 7 9 promise swing_level 6 10 promise swing_level 3 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf269 3.22331191537 0 87.90000085 1.9799987250000015 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 6 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf270 2.28385340025 0 87.23833315 2.9725002749999945 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 promise swing_level 7 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 6 -10 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 7 12 promise swing_level 7 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf271 2.56726621541 0 87.99666665 1.8350000250000065 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 10 promise swing_level 6 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf272 3.73354140496 0 87.47749935 2.613750974999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 6 10 promise swing_level 3 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf273 3.6204988657 0 88.1550005 1.5974992499999985 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 5 9 promise swing_level 6 10 promise swing_level 3 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf274 4.02372564807 0 87.5900007 2.4449989499999916 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 7 promise swing_level 5 8 promise swing_level 5 9 promise swing_level 3 @@ -5219,2268 +5219,2268 @@ conf274 4.02372564807 0 87.5900007 2.4449989499999916 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf275 4.62353838059 0 87.5825008 2.4562487999999902 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf276 3.51050063574 0 87.59416715 2.438749274999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 10 promise swing_level 6 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf277 3.70577219978 0 87.614999375 2.4075009375000036 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 6 10 promise swing_level 3 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf278 4.59420908695 0 87.56083335 2.4887499750000046 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 5 9 promise swing_level 6 10 promise swing_level 3 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf279 2.88189757495 0 87.353333075 2.800000387500006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf280 3.56121805284 0 87.5758337 2.4662494499999923 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 10 promise swing_level 6 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf281 3.58255705929 0 87.561666325 2.4875005124999916 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 10 promise swing_level 6 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf282 3.11580531377 0 87.601667 2.427499499999989 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 10 promise swing_level 6 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf283 3.06039148759 0 87.4933328 2.5900007999999914 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 7 -9 gpu conv samp 32 add fp32 1 relu fp32 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 7 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf284 3.38002422456 0 87.65249995 2.3512500749999887 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 10 promise swing_level 6 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf285 3.64255641277 0 87.574165675 2.4687514874999934 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 6 10 promise swing_level 3 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf286 3.56121805284 0 87.6333336 2.379999599999998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 10 promise swing_level 6 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf287 3.18552954817 0 87.66083365 2.338749524999997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 10 promise swing_level 6 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf288 3.82189695779 0 87.3058323 2.8712515499999895 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 5 9 promise swing_level 6 10 promise swing_level 3 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf289 2.55109133649 0 88.824998875 0.5925016874999898 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 7 7 promise swing_level 5 8 promise swing_level 6 9 promise swing_level 7 10 promise swing_level 7 -11 gpu conv perf 28 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf290 2.35515489426 0 88.702499575 0.7762506374999916 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 7 7 promise swing_level 6 8 promise swing_level 3 9 promise swing_level 7 -10 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf291 2.66035423751 0 88.725834075 0.7412488875000065 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 7 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 7 10 promise swing_level 7 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf292 2.46892186955 0 88.620833925 0.8987491124999991 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 7 7 promise swing_level 5 8 promise swing_level 5 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 7 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf293 2.43871043482 0 88.8566664 0.5450004000000064 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 7 7 promise swing_level 5 8 promise swing_level 3 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 7 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf294 2.58640601032 0 88.735001425 0.7274978625000088 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 7 7 promise swing_level 5 8 promise swing_level 3 9 promise swing_level 7 10 promise swing_level 7 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf295 2.40247335088 0 88.687499975 0.7987500375000067 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 7 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 9 promise swing_level 7 10 promise swing_level 6 -11 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf296 2.81041738465 0 88.028332525 1.787501212500004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 7 promise swing_level 3 8 promise swing_level 3 -9 gpu conv perf 22 add fp32 1 relu fp32 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 10 promise swing_level 7 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf297 3.90290081063 0 88.0141664 1.808750400000008 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 3 8 promise swing_level 5 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf298 3.04787736289 0 87.907500275 1.9687495874999925 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 7 promise swing_level 3 -8 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf299 3.23637931184 0 88.330000375 1.3349994375000023 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 7 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf300 2.93456535715 0 87.989167825 1.846248262500005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 3 8 promise swing_level 5 9 promise swing_level 5 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf301 3.16736124336 0 88.041667175 1.7674992374999974 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 3 8 promise swing_level 5 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 7 11 promise swing_level 6 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf302 2.78424216484 0 88.337501375 1.323747937499995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 7 10 promise swing_level 7 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf303 3.80420184699 0 87.9891664 1.8462503999999953 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 3 8 promise swing_level 5 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf304 3.23637931184 0 88.4875002 1.098749699999999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 7 8 promise swing_level 5 9 promise swing_level 5 10 promise swing_level 7 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf305 2.62041316054 0 88.203333325 1.5250000124999943 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf306 3.23593208824 0 88.0941658 1.6887512999999998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 3 8 promise swing_level 5 9 promise swing_level 7 10 promise swing_level 7 11 promise swing_level 6 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf307 3.42159173642 0 88.443335225 1.1649971625000006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 3 8 promise swing_level 5 9 promise swing_level 5 10 promise swing_level 7 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf308 3.3002347944 0 88.093334025 1.6899989624999918 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 5 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 7 11 promise swing_level 3 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf309 3.62872735439 0 87.960833225 1.8887501624999956 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 3 8 promise swing_level 5 9 promise swing_level 3 10 promise swing_level 7 11 promise swing_level 6 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf310 3.4527019977 0 88.395832675 1.2362509875000072 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 3 8 promise swing_level 5 9 promise swing_level 5 10 promise swing_level 7 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf311 3.42159173642 0 88.39416615 1.2387507749999926 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 3 8 promise swing_level 5 9 promise swing_level 5 10 promise swing_level 7 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf312 4.2916576411 0 87.390833475 2.7437497875000076 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf313 3.66154160089 0 87.591667 2.4424994999999967 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 5 9 promise swing_level 3 10 promise swing_level 6 11 promise swing_level 3 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf314 3.57662923878 0 87.331667275 2.832499087499997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv perf 25 add fp32 1 relu fp32 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf315 3.31073305677 0 87.804167125 2.1237493124999887 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf316 3.75639965063 0 87.25750005 2.9437499249999917 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf317 3.3747472006 0 87.392500825 2.7412487624999997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf318 3.66823325689 0 87.430834 2.6837489999999917 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 6 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf319 2.82170564914 0 87.6274997 2.3887504499999963 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 7 promise swing_level 3 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 5 12 promise swing_level 3 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf320 3.43338092572 0 87.43666615 2.675000775000008 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf321 2.92844895413 0 87.87833435 2.012498474999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 7 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf322 3.4527019977 0 87.357498925 2.793751612499996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf323 3.23198697458 0 87.3575007 2.793748949999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf324 3.80245163432 0 87.6775002 2.3137497000000025 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf325 2.15268737669 0 87.3516675 2.802498749999991 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 7 promise swing_level 5 8 promise swing_level 7 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf326 3.8889652023 0 87.38416695 2.7537495750000076 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 5 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf327 3.66431089406 0 87.38166555 2.7575016750000074 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf328 2.88189757495 0 87.393332725 2.740000912500008 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf329 2.39980610692 0 88.320000325 1.3499995125000055 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 6 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf330 4.15952302959 0 87.3941673 2.7387490499999885 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf331 2.77909300529 0 87.6575016 2.3437475999999933 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 5 11 promise swing_level 7 12 promise swing_level 6 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf332 3.63257386824 0 87.580834025 2.458748962499996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf333 2.67077160752 0 87.36833175 2.7775023750000045 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 6 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf334 2.3107659614 0 88.750832 0.7037519999999944 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 7 7 promise swing_level 5 -8 gpu conv perf 28 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv perf 28 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 7 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf335 2.43871043482 0 88.830000375 0.5849994375000023 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 7 7 promise swing_level 5 8 promise swing_level 3 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 7 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf336 2.25669026527 0 88.19916785 1.531248225000006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf337 2.62041316054 0 88.201666775 1.5274998374999882 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf338 3.44871882267 0 87.9841679 1.8537481499999942 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 5 8 promise swing_level 5 9 promise swing_level 5 10 promise swing_level 5 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf339 3.12759614858 0 88.134999425 1.6275008624999927 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 5 10 promise swing_level 7 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf340 2.30252960566 0 88.02416645 1.7937503250000049 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 7 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf341 3.43880097701 0 87.575833525 2.4662497125000087 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 6 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf342 2.91420549975 0 87.312500225 2.8612496625000077 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf343 4.33676241041 0 87.852499375 2.051250937500008 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 6 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf344 3.47275668824 0 87.641666 2.367500999999997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 6 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf345 2.71196063726 0 87.354999175 2.7975012374999935 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 7 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf346 4.32734587799 0 87.410000375 2.714999437500005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 3 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf347 3.1944378689 0 87.351667025 2.802499462500002 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf348 2.96037977555 0 87.29999945 2.880000824999996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf349 2.88782087104 0 87.430834275 2.6837485875000056 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf350 2.88885995212 0 87.5633341 2.4849988499999895 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 7 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf351 2.75314466005 0 87.907500075 1.968749887499996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 5 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf352 2.47507600395 0 87.9208338 1.948749300000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf353 2.74937349567 0 87.3749995 2.7675007499999964 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf354 3.09521339508 0 87.6141666 2.408750099999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 7 promise swing_level 3 8 promise swing_level 3 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf355 2.99748429973 0 87.36499935 2.7825009749999907 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 -9 gpu conv samp 32 add fp32 1 relu fp32 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf356 2.88189757495 0 87.56749895 2.478751574999997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf357 2.90288611892 0 87.2866667 2.8999999500000015 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf358 4.40060202139 0 87.384166725 2.7537499124999982 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 6 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf359 5.0608679523 0 87.2600004 2.939999400000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 3 11 promise swing_level 6 12 promise swing_level 6 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf360 3.13335039202 0 87.685000575 2.302499137499993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf361 2.88189757495 0 87.39916595 2.731251075000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf362 2.41436297924 0 87.5375009 2.5237486500000017 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf363 2.8243431753 0 87.335833375 2.8262499374999877 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 3 10 promise swing_level 5 11 promise swing_level 7 12 promise swing_level 6 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf364 2.69363345703 0 87.7666664 2.18000039999999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf365 2.93855473873 0 87.74083295 2.2187505750000014 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf366 4.10034598028 0 87.511667025 2.562499462500007 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf367 2.79532072438 0 87.648334325 2.3574985125000083 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf368 2.99748429973 0 87.29499945 2.8875008249999894 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf369 3.3747472006 0 87.335000875 2.8274986874999897 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf370 2.43871043482 0 88.876667225 0.5149991624999899 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 7 7 promise swing_level 5 8 promise swing_level 3 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 7 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf371 3.49304571139 0 87.886666475 2.0000002875000007 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf372 2.62041316054 0 88.18583415 1.5512487749999906 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf373 3.21895495215 0 88.235000825 1.477498762499998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf374 2.89129730127 0 87.355000075 2.7974998874999883 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 6 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf375 3.40893181539 0 87.679167025 2.311249462500001 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 7 promise swing_level 7 8 promise swing_level 3 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf376 3.43040508008 0 87.5433332 2.5150001999999887 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 7 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 7 promise swing_level 6 8 promise swing_level 3 9 promise swing_level 6 10 promise swing_level 3 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf377 3.32365532462 0 87.650000725 2.354998912500001 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf378 2.85673090848 0 87.6350016 2.377497600000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 7 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf379 3.66943077008 0 87.448333725 2.6574994125000018 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 6 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf380 3.61281847778 0 87.5375004 2.5237494 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 3 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf381 2.91100547222 0 87.31250005 2.8612499250000027 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 7 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf382 2.5775096386 0 88.148333525 1.607499712500001 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf383 2.99444983413 0 87.32000015 2.8499997750000006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf384 2.94400390535 0 87.27166645 2.9225003250000015 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 9 promise swing_level 3 10 promise swing_level 6 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf385 2.72772934611 0 88.04250105 1.7662484250000006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 9 promise swing_level 6 10 promise swing_level 6 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf386 2.88189757495 0 87.357499525 2.7937507125000067 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf387 3.20599102562 0 88.057500625 1.7437490624999938 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 9 promise swing_level 3 10 promise swing_level 6 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf388 3.95787184337 0 87.305834375 2.871248437499993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 6 8 promise swing_level 3 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf389 2.85571480751 0 87.63000045 2.3849993250000026 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 3 10 promise swing_level 6 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf390 2.09444472403 0 88.0391668 1.7712497999999925 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 7 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf391 3.38291326538 0 87.556666775 2.4949998375000035 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 6 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 10 promise swing_level 6 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf392 2.59992645158 0 87.334166375 2.8287504375000054 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf393 3.10125965268 0 87.31916695 2.851249575000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 9 promise swing_level 7 10 promise swing_level 6 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/data/autotuner_data/tuner_promise_confs_batch220_single.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/data/autotuner_data/tuner_promise_confs_batch220_single.txt index 0f4b80d812..c31986ee32 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/data/autotuner_data/tuner_promise_confs_batch220_single.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/data/autotuner_data/tuner_promise_confs_batch220_single.txt @@ -19,1151 +19,1151 @@ conf1 1 0 89.22 0 ----- +++++ conf1 2.67672467719 0 88.795999525 0.6360007124999996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 6 6 promise swing_level 6 7 promise swing_level 5 8 promise swing_level 6 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 28 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf2 3.33800665782 0 88.866998825 0.5295017625000042 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 6 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 4 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 4 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf3 3.22938698033 0 88.9155001 0.45674984999999424 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 6 6 promise swing_level 5 7 promise swing_level 4 8 promise swing_level 6 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf4 3.21549257066 0 88.928999525 0.6910004750000042 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 7 7 promise swing_level 3 8 promise swing_level 6 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf5 2.90359100342 0 88.988999775 0.6310002250000025 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 4 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 4 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf6 3.13172659997 0 88.68449835 0.8032524750000007 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 6 6 promise swing_level 7 7 promise swing_level 3 8 promise swing_level 3 -9 gpu conv perf 29 add fp32 1 relu fp32 1 +9 gpu conv perf 29 add fp16 1 relu fp16 1 10 promise swing_level 4 -11 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 12 promise swing_level 4 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf7 3.24295282247 0 88.95999925 0.6600007500000032 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 6 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 3 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 4 -11 gpu conv perf 28 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf8 2.97804305951 0 88.821499125 0.5977513124999945 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 6 6 promise swing_level 5 7 promise swing_level 4 8 promise swing_level 7 9 promise swing_level 6 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf9 3.26647563775 0 88.669999725 0.8250004125000032 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 7 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 5 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf10 3.1477761713 0 88.998499625 0.6215003750000051 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 5 7 promise swing_level 5 8 promise swing_level 6 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf11 3.32593694107 0 88.800000975 0.6299985374999935 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 6 5 promise swing_level 6 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 4 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 4 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 7 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf12 2.91986801853 0 88.790999025 0.6435014624999909 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 6 6 promise swing_level 5 7 promise swing_level 5 8 promise swing_level 7 9 promise swing_level 6 10 promise swing_level 4 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf13 3.02627918929 0 88.991499875 0.6285001249999965 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 7 5 promise swing_level 6 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 6 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 4 -11 gpu conv perf 28 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf14 3.28843620388 0 88.86400015 0.5339997750000052 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 6 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 6 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 4 -11 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 12 promise swing_level 4 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf15 3.44523051586 0 88.7865004 0.650249400000007 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 6 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 4 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 4 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 4 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf16 2.95449706541 0 88.74249965 0.7162505249999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 5 7 promise swing_level 3 8 promise swing_level 6 9 promise swing_level 7 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf17 3.17115007156 0 88.9384999 0.6815001000000024 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 7 7 promise swing_level 4 8 promise swing_level 6 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf18 2.94721511168 0 88.76149905 0.6877514250000019 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 6 6 promise swing_level 5 7 promise swing_level 5 8 promise swing_level 7 9 promise swing_level 6 10 promise swing_level 3 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf19 2.84317494425 0 88.622499625 0.8962505624999935 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 7 7 promise swing_level 6 -8 gpu conv perf 28 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 promise swing_level 3 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +8 gpu conv perf 28 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 promise swing_level 3 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf20 3.12801392627 0 88.6580003 0.8429995500000018 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 7 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 6 -11 gpu conv perf 28 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf21 3.20643001091 0 88.8189993 0.601501049999996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 6 6 promise swing_level 3 7 promise swing_level 7 8 promise swing_level 4 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 4 -11 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf22 2.97804305951 0 88.983499975 0.636500024999998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 7 5 promise swing_level 6 6 promise swing_level 6 7 promise swing_level 5 8 promise swing_level 3 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 4 -11 gpu conv perf 28 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf23 2.45477347176 0 88.96349945 0.6565005499999984 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 6 5 promise swing_level 6 6 promise swing_level 3 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 28 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf24 3.46470692025 0 88.749 0.7065000000000055 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 4 5 promise swing_level 6 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 3 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 4 -11 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf25 3.00049288293 0 88.61949895 0.9007515750000081 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 6 5 promise swing_level 6 6 promise swing_level 3 -7 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 7 -11 gpu conv perf 28 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf26 3.13251209559 0 88.65699895 0.8445015749999953 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 6 6 promise swing_level 3 7 promise swing_level 7 8 promise swing_level 4 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 4 -11 gpu conv perf 28 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf27 2.89866398854 0 88.8870002 0.4994996999999941 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 7 7 promise swing_level 3 8 promise swing_level 6 9 promise swing_level 7 -10 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf28 3.18762540477 0 88.651999125 0.8520013124999934 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 6 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 6 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 4 -11 gpu conv perf 28 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf29 3.10324605397 0 88.752499425 0.7012508625000038 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 5 7 promise swing_level 7 8 promise swing_level 7 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 7 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf30 3.10163618852 0 88.9185004 0.45224939999999947 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 7 7 promise swing_level 4 8 promise swing_level 7 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf31 4.02510890193 0 88.048000175 1.7579997375000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 6 8 promise swing_level 7 9 promise swing_level 5 10 promise swing_level 6 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf32 4.53841128362 0 88.008499275 1.81725108749999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 5 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf33 3.7422923712 0 88.235000775 1.4774988374999936 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 6 7 promise swing_level 7 8 promise swing_level 3 9 promise swing_level 4 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf34 4.46801808483 0 87.952000425 1.901999362500007 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 5 8 promise swing_level 5 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf35 5.38932162058 0 87.9340008 1.928998799999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 4 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf36 2.79824347053 0 88.20149995 1.5277500750000002 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 8 promise swing_level 5 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 7 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf37 3.30570890052 0 88.429000025 1.186499962500008 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 3 -9 gpu conv perf 22 add fp32 1 relu fp32 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 10 promise swing_level 4 11 promise swing_level 7 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf38 2.58779938472 0 87.88850105 1.997248424999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 5 12 promise swing_level 4 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf39 2.95412434924 0 87.957499275 1.893751087499993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 7 promise swing_level 7 8 promise swing_level 5 9 promise swing_level 4 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 5 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf40 5.14688853889 0 87.89050085 1.9942487250000056 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 3 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf41 3.84148967029 0 87.90000095 1.979998574999989 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 5 10 promise swing_level 4 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf42 4.87614175127 0 88.026999725 1.7895004125000042 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 5 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf43 5.27401221354 0 87.9860003 1.8509995499999974 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 4 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf44 4.41605844773 0 87.987500025 1.8487499624999941 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 8 promise swing_level 6 9 promise swing_level 5 10 promise swing_level 4 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf45 4.73989653012 0 88.117500425 1.653749362499994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 6 9 promise swing_level 7 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf46 2.67313439765 0 88.13850065 1.6222490250000021 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 7 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf47 5.14688853889 0 87.87649975 2.015250375000008 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf48 3.22545779989 0 88.78299925 0.6555011249999936 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 7 promise swing_level 5 8 promise swing_level 3 -9 gpu conv perf 22 add fp32 1 relu fp32 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 10 promise swing_level 4 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf49 3.90610167834 0 87.907000425 1.9694993624999881 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 8 promise swing_level 6 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 7 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf50 4.18791473608 0 88.030001075 1.7849983874999964 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 6 8 promise swing_level 7 9 promise swing_level 5 10 promise swing_level 6 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf51 3.1944378689 0 87.805000325 2.1224995125000063 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 4 10 promise swing_level 5 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf52 4.44807369936 0 87.99100075 1.8434988750000016 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 3 8 promise swing_level 5 9 promise swing_level 3 10 promise swing_level 6 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf53 5.55581088584 0 87.87150075 2.022748875000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 4 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf54 3.64253280448 0 88.276000225 1.415999662499992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 7 8 promise swing_level 4 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 4 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf55 4.64593590175 0 87.996999875 1.834500187499998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 5 8 promise swing_level 4 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf56 5.20176711133 0 88.074500025 1.7182499624999892 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 4 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf57 3.9663633052 0 88.0735002 1.7197497000000013 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 8 promise swing_level 6 -9 gpu conv perf 22 add fp32 1 relu fp32 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf58 4.60837357682 0 87.999500025 1.8307499624999934 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 8 promise swing_level 6 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf59 4.06340711578 0 88.002000275 1.8269995874999978 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 6 8 promise swing_level 6 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf60 5.09426380021 0 87.855000525 2.047499212500007 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 7 8 promise swing_level 3 9 promise swing_level 4 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf61 4.23911959499 0 88.007501175 1.818748237499996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 3 @@ -1171,2603 +1171,2603 @@ conf61 4.23911959499 0 88.007501175 1.818748237499996 10 promise swing_level 4 11 promise swing_level 4 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf62 4.5453117753 0 88.0409992 1.7685011999999958 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 8 promise swing_level 6 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf63 3.45219299503 0 88.565001325 0.9824980125000025 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 7 promise swing_level 3 -8 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 9 promise swing_level 7 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf64 3.86383274875 0 87.97849965 1.8622505249999932 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 5 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf65 3.44027448615 0 87.9340002 1.9289996999999985 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 6 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf66 4.07584294896 0 87.9475008 1.908748799999998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 5 8 promise swing_level 5 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf67 3.63588745737 0 88.243500675 1.4647489874999877 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 7 8 promise swing_level 7 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 4 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf68 5.05122495077 0 87.925000225 1.9424996624999906 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 3 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf69 3.05020975179 0 88.57749975 0.9637503749999965 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf70 4.17038771832 0 88.218001225 1.5029981625000062 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 4 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf71 3.7658637414 0 88.2555001 1.4467498499999891 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 3 8 promise swing_level 6 9 promise swing_level 5 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf72 3.52912784592 0 87.86699945 2.0295008250000066 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 7 promise swing_level 6 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 7 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf73 3.65088558479 0 88.315000225 1.3574996624999898 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 6 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 4 11 promise swing_level 5 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf74 4.33442115154 0 88.005999775 1.8210003374999886 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 8 promise swing_level 7 9 promise swing_level 5 10 promise swing_level 6 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf75 3.68236565425 0 88.5015 1.0777500000000089 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 7 9 promise swing_level 4 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf76 3.06506484873 0 87.92800145 1.9379978250000036 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 7 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 6 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 7 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf77 4.58360534701 0 87.95550005 1.8967499250000017 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 5 8 promise swing_level 5 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf78 3.75050871209 0 87.8379992 2.0730012 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 7 promise swing_level 6 -8 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf79 3.4348497926 0 88.467500925 1.1287486125000044 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 6 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 4 11 promise swing_level 7 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf80 5.28800605153 0 87.9869996 1.8495005999999918 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf81 4.39173000998 0 87.98149965 1.857750524999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 8 promise swing_level 6 9 promise swing_level 5 10 promise swing_level 6 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf82 5.3783876796 0 87.84850185 2.0572472249999905 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf83 3.162335396 0 88.293000575 1.390499137500008 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 7 9 promise swing_level 5 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf84 5.09426380021 0 87.883499725 2.004750412500009 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 4 10 promise swing_level 7 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf85 2.74020437986 0 87.999500575 1.8307491374999998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 7 promise swing_level 5 -8 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf86 5.51735857539 0 87.97999975 1.8600003749999914 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 4 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf87 5.19158023592 0 87.911000075 1.9634998874999923 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf88 4.1250848507 0 88.0879993 1.6980010499999878 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 6 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf89 2.66123604862 0 88.5129999 1.0605001500000029 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 7 promise swing_level 3 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf90 4.97556018173 0 87.916000775 1.9559988374999975 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 6 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 4 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf91 3.34499993327 0 87.9115002 1.9627496999999892 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 3 10 promise swing_level 7 11 promise swing_level 4 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf92 3.09000593446 0 87.896501125 1.9852483124999978 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 7 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 6 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 7 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf93 2.83304211589 0 87.880998975 2.0085015375000026 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 7 promise swing_level 5 -8 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 5 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf94 2.63194848111 0 88.75999965 0.6900005250000021 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf95 3.83101529319 0 87.98750035 1.848749474999991 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 6 8 promise swing_level 7 9 promise swing_level 7 10 promise swing_level 6 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf96 3.84827608282 0 88.2220012 1.4969982000000073 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 5 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf97 2.78877373583 0 88.42150035 1.1977494749999948 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 6 7 promise swing_level 3 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf98 3.53383224157 0 88.07199995 1.7220000749999897 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 5 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf99 4.19968149048 0 88.216000175 1.505999737499991 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 4 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf100 4.65047225574 0 87.902000775 1.9769988374999912 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 3 8 promise swing_level 6 9 promise swing_level 5 10 promise swing_level 6 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf101 4.78444410626 0 87.889500025 1.9957499624999926 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 3 8 promise swing_level 7 9 promise swing_level 5 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf102 4.57312486503 0 87.9970007 1.8344989499999969 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 8 promise swing_level 6 9 promise swing_level 5 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf103 3.8889652023 0 88.073999225 1.7190011625000068 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 8 promise swing_level 6 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf104 5.21876975148 0 87.87750095 2.013748575000001 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf105 2.64826960752 0 88.22749975 1.488750374999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 7 promise swing_level 5 -8 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf106 4.11867598703 0 88.0099998 1.8150002999999941 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 6 8 promise swing_level 7 9 promise swing_level 5 10 promise swing_level 6 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf107 3.17702992638 0 88.022500825 1.7962487625000065 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 3 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 7 12 promise swing_level 6 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf108 4.37886412672 0 87.893499925 1.9897501124999977 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 4 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf109 3.92920493734 0 88.0840001 1.7039998500000024 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 6 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 6 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf110 4.70261524955 0 88.0150007 1.8074989499999958 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 6 9 promise swing_level 7 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf111 4.64416988913 0 88.018500125 1.8022498124999942 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 5 8 promise swing_level 6 9 promise swing_level 5 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf112 4.90315521962 0 88.029999875 1.7850001874999961 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf113 4.06270214483 0 87.874499575 2.018250637499996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 5 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf114 4.30863496096 0 88.025001125 1.792498312499994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 6 8 promise swing_level 7 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf115 3.95781609888 0 88.2520015 1.4519977499999897 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 3 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf116 2.77744490196 0 88.174500725 1.568248912499996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 7 promise swing_level 5 8 promise swing_level 7 9 promise swing_level 5 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf117 4.77014987409 0 87.975501075 1.866748387500003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 3 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf118 3.8889652023 0 87.9205002 1.9492496999999886 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 8 promise swing_level 6 9 promise swing_level 5 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf119 2.79824347053 0 88.446000075 1.1609998874999974 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 8 promise swing_level 5 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 7 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf120 3.83776471539 0 88.132000175 1.6319997374999957 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 7 8 promise swing_level 4 9 promise swing_level 5 10 promise swing_level 4 11 promise swing_level 7 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf121 4.19454106212 0 87.97800115 1.8629982750000025 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 6 8 promise swing_level 6 9 promise swing_level 5 10 promise swing_level 6 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf122 3.59382826036 0 88.37350045 1.2697493250000065 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 3 -9 gpu conv perf 22 add fp32 1 relu fp32 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf123 5.36872859298 0 87.894000225 1.9889996624999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 4 10 promise swing_level 5 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf124 4.967292681 0 87.940499875 1.9192501874999977 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 4 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf125 5.59870437268 0 87.8770005 2.014499250000007 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 4 10 promise swing_level 4 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf126 5.46690922425 0 87.925 1.9425000000000026 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 4 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf127 4.5513485126 0 87.83949905 2.0707514249999974 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 4 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf128 2.93815531254 0 88.154000825 1.5989987625000026 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 6 7 promise swing_level 3 8 promise swing_level 4 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf129 4.25423335575 0 88.0705007 1.7242489500000033 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 8 promise swing_level 6 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf130 3.1598284487 0 88.1879997 1.5480004499999893 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 7 promise swing_level 6 8 promise swing_level 7 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf131 3.92275999643 0 87.850001125 2.0549983124999898 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 7 promise swing_level 4 8 promise swing_level 7 9 promise swing_level 5 10 promise swing_level 6 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf132 5.09426380021 0 87.838500025 2.0722499624999955 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 4 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf133 3.01559448556 0 88.365500375 1.2817494375000038 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 6 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 7 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf134 3.92404729263 0 88.058001 1.7429984999999917 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 6 8 promise swing_level 7 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf135 4.13577014781 0 87.9234998 1.9447502999999955 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 7 8 promise swing_level 6 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf136 5.2833331949 0 87.856000525 2.0459992125 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf137 3.8984608492 0 88.206499825 1.5202502625000065 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 7 8 promise swing_level 3 9 promise swing_level 4 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf138 4.93553563631 0 88.05350055 1.7497491750000052 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf139 4.75497508116 0 87.992001125 1.841998312499996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 5 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf140 3.91377251774 0 88.252000375 1.4519994375000067 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 4 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf141 3.28753211331 0 88.596500575 0.9352491375000085 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 5 -9 gpu conv perf 22 add fp32 1 relu fp32 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 7 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf142 3.83101529319 0 87.9215 1.9477500000000063 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 7 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 7 10 promise swing_level 5 11 promise swing_level 7 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf143 5.08343547558 0 87.985002125 1.8524968125000072 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 4 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf144 2.49103797578 0 88.1110008 1.6634987999999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf145 2.87148736415 0 88.006000425 1.8209993625000038 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 7 promise swing_level 6 8 promise swing_level 6 9 promise swing_level 5 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf146 4.79309454233 0 87.953999325 1.8990010125000012 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 3 8 promise swing_level 6 9 promise swing_level 5 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf147 4.96422137146 0 87.966501075 1.8802483875000036 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 7 promise swing_level 3 8 promise swing_level 4 9 promise swing_level 7 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf148 4.5862610469 0 87.8730011 2.020498350000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 4 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf149 5.01731395935 0 87.9515011 1.902748349999996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 3 8 promise swing_level 6 9 promise swing_level 5 10 promise swing_level 4 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf150 3.78073451439 0 88.221500575 1.4977491375000085 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 7 8 promise swing_level 4 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 4 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf151 5.38932162058 0 87.9480001 1.907999849999996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 4 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf152 3.92275999643 0 87.88149885 2.0077517249999985 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 7 promise swing_level 6 8 promise swing_level 7 9 promise swing_level 5 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf153 4.32109087507 0 87.87050155 2.024247674999998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 4 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf154 3.39437547281 0 88.3980003 1.2329995499999882 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 4 9 promise swing_level 5 10 promise swing_level 5 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 4 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf155 2.68997195525 0 88.15300025 1.6004996249999905 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 7 promise swing_level 5 -8 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf156 4.09121224125 0 88.002499775 1.8262503374999923 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 8 promise swing_level 6 9 promise swing_level 5 10 promise swing_level 6 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf157 2.71748124247 0 88.203498925 1.5247516124999905 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 7 promise swing_level 5 -8 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf158 3.79392955242 0 88.50399955 1.0740006749999935 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 6 9 promise swing_level 5 10 promise swing_level 4 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf159 5.13475635542 0 88.0090003 1.8164995500000032 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf160 3.54797618601 0 88.251999825 1.4520002625000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 8 promise swing_level 7 9 promise swing_level 4 10 promise swing_level 6 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 4 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf161 3.59382826036 0 88.361999925 1.2870001124999888 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 4 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf162 3.84395946571 0 87.938499625 1.9222505624999897 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 8 promise swing_level 7 9 promise swing_level 5 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf163 2.64475520546 0 88.724999975 0.7425000374999939 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 4 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf164 3.54062240207 0 87.826000675 2.0909989874999937 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 6 7 promise swing_level 6 8 promise swing_level 5 9 promise swing_level 5 10 promise swing_level 6 11 promise swing_level 7 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf165 4.95188675609 0 87.977000675 1.8644989874999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 3 8 promise swing_level 6 9 promise swing_level 3 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf166 3.99750955296 0 87.891999975 1.9920000374999915 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 4 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf167 2.88361282775 0 87.810000425 2.1149993625000008 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf168 3.2042728233 0 87.8145006 2.1082490999999948 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 4 10 promise swing_level 5 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf169 4.7615819855 0 88.0465002 1.7602497000000028 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf170 2.79333396532 0 88.242000025 1.4669999625000045 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 4 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 +7 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 5 12 promise swing_level 4 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf171 5.47569782913 0 87.973500275 1.8697495874999888 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 4 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf172 5.46690922425 0 87.995500725 1.8367489124999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 4 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf173 5.3783876796 0 87.992999325 1.840501012499999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 4 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf174 2.84111960368 0 88.248000225 1.4579996625000007 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 7 8 promise swing_level 7 9 promise swing_level 7 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 6 12 promise swing_level 4 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf175 4.72491330829 0 88.02350005 1.794749925000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 3 8 promise swing_level 6 9 promise swing_level 5 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf176 5.51735857539 0 87.863500825 2.0347487624999943 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 4 10 promise swing_level 4 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf177 4.30863496096 0 88.0470007 1.7594989500000011 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 8 promise swing_level 7 9 promise swing_level 5 10 promise swing_level 6 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf178 3.97624115754 0 88.179000275 1.5614995874999877 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 7 9 promise swing_level 5 10 promise swing_level 4 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf179 4.01900669565 0 87.961499725 1.8877504125000044 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 8 promise swing_level 7 9 promise swing_level 5 10 promise swing_level 6 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf180 4.05718832389 0 88.103001 1.6754984999999891 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 7 promise swing_level 3 8 promise swing_level 7 9 promise swing_level 4 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf181 4.4721779046 0 87.604999175 2.4225012374999935 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 6 11 promise swing_level 7 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf182 5.20740625144 0 87.52999955 2.535000674999999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 7 10 promise swing_level 3 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf183 2.69815960592 0 87.4265 2.690249999999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 6 7 promise swing_level 3 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf184 4.69888006494 0 87.5189997 2.551500450000006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf185 3.88329535198 0 87.9320007 1.9319989499999934 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 6 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 5 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 4 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf186 3.91055907108 0 87.590500775 2.4442488375000053 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 5 10 promise swing_level 6 11 promise swing_level 7 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf187 4.07862338917 0 87.7545004 2.1982494000000017 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf188 4.22341448608 0 87.341498725 2.8177519125000074 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 7 promise swing_level 5 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 5 11 promise swing_level 6 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf189 5.85932654499 0 87.4095005 2.715749250000009 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 5 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf190 2.33630820793 0 87.58749975 2.448750374999989 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 7 -12 gpu conv perf 25 add fp32 1 relu fp32 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf191 2.67193977712 0 88.205001225 1.5224981624999927 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 7 promise swing_level 7 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf192 3.47931394928 0 87.508498975 2.567251537500006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 7 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf193 3.18892766897 0 87.7615004 2.187749399999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 7 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf194 5.5791406199 0 87.5825015 2.4562477499999886 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf195 4.32029366538 0 87.591999625 2.4420005624999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 7 promise swing_level 5 8 promise swing_level 6 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 6 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf196 4.04213127221 0 87.873001 2.020498499999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 6 -11 gpu conv perf 25 add fp32 1 relu fp32 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf197 4.11228700649 0 87.88050055 2.009249175000008 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 6 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf198 4.33281717738 0 87.8665005 2.030249249999997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 @@ -3775,549 +3775,549 @@ conf198 4.33281717738 0 87.8665005 2.030249249999997 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf199 4.28622435323 0 87.7855013 2.1517480499999877 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 7 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf200 4.73610191123 0 87.494000025 2.58899996249999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 4 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf201 2.73431176691 0 87.317999075 2.8530013874999938 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 4 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 4 11 promise swing_level 3 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf202 4.74173611035 0 87.585500525 2.4517492124999976 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 7 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf203 4.39977520613 0 87.823999825 2.0940002624999963 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 3 10 promise swing_level 6 11 promise swing_level 7 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf204 5.45317053297 0 87.4604998 2.6392503000000076 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf205 3.6977748207 0 87.459499925 2.640750112499994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf206 2.71970367972 0 87.562499325 2.4862510124999915 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 4 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf207 4.64593590175 0 87.30700015 2.8694997750000084 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 6 8 promise swing_level 5 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 7 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf208 3.69092657028 0 87.7089999 2.266500150000006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 7 9 promise swing_level 6 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 7 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf209 4.88508479889 0 87.574999825 2.467500262499989 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf210 3.84703801278 0 87.6125004 2.4112493999999955 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 4 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf211 2.22605813358 0 87.835501025 2.0767484624999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 7 promise swing_level 7 8 promise swing_level 5 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf212 3.64643231622 0 87.6060001 2.420999849999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 4 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf213 4.39492393417 0 87.848999425 2.0565008624999948 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf214 5.28219109366 0 87.688000125 2.297999812499995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf215 2.89866398854 0 87.268999675 2.926500487499993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 6 7 promise swing_level 5 8 promise swing_level 4 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf216 4.51950617171 0 87.430500075 2.684249887500002 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf217 3.65813348607 0 87.207999 3.018001499999997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 3 10 promise swing_level 7 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf218 4.5332130836 0 87.65350015 2.3497497749999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 6 11 promise swing_level 7 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf219 3.67843710223 0 88.1550001 1.5974998500000055 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 6 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf220 5.58562815692 0 87.36900085 2.776498724999989 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf221 3.23637931184 0 87.543501275 2.514748087500003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 3 8 promise swing_level 7 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf222 2.35610274421 0 87.727000725 2.2394989125000038 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 4 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf223 2.4679461159 0 87.682001075 2.306998387499995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 4 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 4 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf224 2.95673533897 0 87.657000375 2.344499437500005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 5 -12 gpu conv perf 25 add fp32 1 relu fp32 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf225 3.22028294607 0 88.04250065 1.7662490250000076 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf226 4.17546908121 0 87.8365009 2.075248649999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 4 9 promise swing_level 7 10 promise swing_level 6 11 promise swing_level 7 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf227 4.07723269504 0 87.643000975 2.365498537499988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 4 8 promise swing_level 6 @@ -4326,549 +4326,549 @@ conf227 4.07723269504 0 87.643000975 2.365498537499988 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf228 3.23115080569 0 88.144500825 1.6132487625000067 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 6 8 promise swing_level 4 9 promise swing_level 7 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf229 4.0953860212 0 87.3284996 2.8372505999999973 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf230 5.47313823098 0 87.58550015 2.451749774999996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf231 4.52121500093 0 87.243499575 2.9647506374999963 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf232 5.33635843446 0 87.597500075 2.4337498874999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf233 2.74684721992 0 88.021999425 1.797000862499992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 6 -11 gpu conv perf 25 add fp32 1 relu fp32 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf234 2.44273830473 0 88.0125003 1.8112495499999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 promise swing_level 3 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 promise swing_level 3 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 7 10 promise swing_level 7 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf235 4.36525951388 0 87.90350075 1.9747488749999889 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 6 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf236 3.59928302884 0 87.6044997 2.4232504499999905 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf237 4.03937131433 0 87.47999975 2.6100003749999914 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 6 7 promise swing_level 4 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf238 2.33928390799 0 87.682500875 2.306248687499995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 promise swing_level 4 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf239 4.47552559764 0 87.408000075 2.7179998874999924 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 4 11 promise swing_level 3 -12 gpu conv perf 21 add fp32 1 relu fp32 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf240 2.70798818771 0 87.539000925 2.521498612500004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 7 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf241 4.15082903285 0 87.70000035 2.279999475000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 4 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf242 3.46524098586 0 87.652499925 2.351250112499997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf243 3.32180872343 0 87.74049925 2.2192511249999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 4 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 6 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf244 4.00757192103 0 88.249000775 1.4564988374999999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf245 3.37045657617 0 88.31399955 1.35900067499999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 9 promise swing_level 4 10 promise swing_level 6 11 promise swing_level 6 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf246 3.12436215137 0 87.587499825 2.448750262500006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 5 8 promise swing_level 4 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf247 3.59118738511 0 88.1109997 1.663500450000008 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 +4 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf248 4.10247107865 0 87.42699945 2.689500825000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 6 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 5 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf249 2.87602796318 0 87.925999475 1.941000787500002 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf250 2.54540669669 0 87.66149995 2.337750074999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 7 promise swing_level 5 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 5 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf251 3.91377251774 0 87.933001425 1.9304978624999976 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 8 promise swing_level 5 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf252 3.05530044192 0 88.06999955 1.7250006749999898 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf253 4.66223872225 0 87.56000035 2.4899994750000047 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf254 2.65477461582 0 87.826500375 2.090249437500006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 5 -12 gpu conv perf 25 add fp32 1 relu fp32 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf255 5.01203410477 0 87.48050035 2.6092494749999986 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 4 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf256 5.34829234153 0 87.372499325 2.771251012499988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 6 @@ -4876,37 +4876,37 @@ conf256 5.34829234153 0 87.372499325 2.771251012499988 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf257 5.75640231547 0 87.56199965 2.487000524999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 5 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 4 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf258 5.7494534426 0 87.395999925 2.736000112500001 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 6 @@ -4914,75 +4914,75 @@ conf258 5.7494534426 0 87.395999925 2.736000112500001 10 promise swing_level 4 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf259 3.10970226214 0 87.762499375 2.1862509374999917 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 6 -11 gpu conv perf 25 add fp32 1 relu fp32 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 12 promise swing_level 4 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf260 4.87711499545 0 87.455000125 2.6474998125000013 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf261 3.12599570071 0 87.4755011 2.6167483499999946 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 7 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 6 7 promise swing_level 5 8 promise swing_level 4 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf262 5.4556585238 0 87.39849925 2.7322511249999977 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 @@ -4990,132 +4990,132 @@ conf262 5.4556585238 0 87.39849925 2.7322511249999977 10 promise swing_level 5 11 promise swing_level 3 12 promise swing_level 6 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf263 4.38532184962 0 87.403999375 2.7240009375000014 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf264 5.73566444112 0 87.534500725 2.528248912499997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf265 4.69888006494 0 87.374998725 2.767501912500002 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf266 3.02745293372 0 87.348499125 2.807251312499993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 4 11 promise swing_level 5 12 promise swing_level 6 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf267 5.80246187667 0 87.479500375 2.6107494374999973 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 5 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf268 2.63313227461 0 87.75899995 2.1915000749999933 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf269 4.39492393417 0 87.6785 2.312249999999999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 5 @@ -5124,113 +5124,113 @@ conf269 4.39492393417 0 87.6785 2.312249999999999 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf270 4.79497565536 0 87.39950045 2.730749324999991 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 7 12 promise swing_level 3 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf271 4.58536300129 0 87.267999075 2.9280013874999895 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 6 8 promise swing_level 6 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 7 12 promise swing_level 3 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf272 3.16821829816 0 87.3795002 2.760749700000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 4 -3 gpu conv perf 28 add fp32 1 relu fp32 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 7 promise swing_level 7 8 promise swing_level 5 9 promise swing_level 6 10 promise swing_level 6 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf273 3.50485239559 0 87.529999875 2.535000187499996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 3 10 promise swing_level 6 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf274 2.94903997004 0 88.84799915 0.5580012749999881 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf275 4.58184903969 0 87.7619999 2.1870001499999887 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 7 promise swing_level 5 8 promise swing_level 4 9 promise swing_level 4 @@ -5238,189 +5238,189 @@ conf275 4.58184903969 0 87.7619999 2.1870001499999887 11 promise swing_level 6 12 promise swing_level 6 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf276 2.15950317423 0 88.06700055 1.7294991749999937 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 6 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf277 3.11787958506 0 87.50199955 2.577000675000008 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 3 10 promise swing_level 7 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf278 4.81234318399 0 87.468000175 2.627999737499998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 5 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf279 3.8788463178 0 87.358998725 2.7915019124999887 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 6 11 promise swing_level 7 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf280 2.80252726257 0 87.6980002 2.282999700000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 4 10 promise swing_level 5 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf281 3.73178125302 0 87.57250125 2.4712481249999954 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 7 promise swing_level 4 8 promise swing_level 6 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 5 11 promise swing_level 4 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf282 2.44048953797 0 88.0640005 1.7339992499999894 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf283 4.53581069428 0 87.65200045 2.351999324999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf284 3.28980290109 0 87.592000875 2.4419986875000035 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf285 4.24815567065 0 87.872000675 2.0219989875000053 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 @@ -5428,835 +5428,835 @@ conf285 4.24815567065 0 87.872000675 2.0219989875000053 11 promise swing_level 7 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf286 5.6503204036 0 87.44849975 2.657250375000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv perf 25 add fp32 1 relu fp32 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf287 2.94469804416 0 87.650500325 2.3542495125000045 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 6 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 4 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf288 3.30985565119 0 87.189999275 3.0450010874999904 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 6 7 promise swing_level 3 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 6 12 promise swing_level 4 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf289 5.6503204036 0 87.47200085 2.6219987249999974 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf290 4.58184903969 0 87.8229998 2.0955002999999905 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 7 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf291 3.84764377729 0 87.338999375 2.821500937499998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 6 8 promise swing_level 4 9 promise swing_level 3 10 promise swing_level 6 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf292 4.58184903969 0 87.470500275 2.624249587499989 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 7 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf293 4.63153992523 0 87.589999975 2.4450000375000016 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 7 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf294 3.2250135894 0 88.1240004 1.6439993999999984 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 7 promise swing_level 6 8 promise swing_level 4 9 promise swing_level 3 10 promise swing_level 6 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf295 5.21533129885 0 87.54349955 2.5147506749999877 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 6 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf296 3.46069601443 0 87.913001 1.960498500000007 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 7 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf297 3.42651493643 0 87.34599955 2.8110006749999954 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 4 11 promise swing_level 5 12 promise swing_level 6 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf298 5.42600404155 0 87.6949997 2.287500450000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf299 2.67557779593 0 87.64700045 2.3594993250000087 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf300 2.72276052655 0 87.662500575 2.3362491375000047 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 6 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf301 4.89107926597 0 87.5904995 2.4442507499999877 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 4 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf302 2.64478009746 0 88.066000075 1.7309998874999906 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 7 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf303 4.40222210176 0 88.08450035 1.7032494749999998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf304 5.53267545053 0 87.50399975 2.57400037499999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf305 3.87011200947 0 87.461500425 2.637749362500003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf306 5.20629673722 0 87.754500225 2.1982496624999968 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 6 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 4 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf307 3.11175118352 0 87.808500125 2.117249812500006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 6 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf308 4.7081305686 0 87.4815008 2.607748799999989 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 -9 gpu conv samp 32 add fp32 1 relu fp32 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf309 4.22497022995 0 87.777999925 2.1630001124999936 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf310 3.03590788662 0 87.682499825 2.306250262500008 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 6 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf311 3.12476169183 0 88.018500675 1.8022489875000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 -12 gpu conv perf 21 add fp32 1 relu fp32 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf312 3.33525315541 0 87.95000115 1.9049982749999899 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 6 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf313 2.63170199143 0 87.578000825 2.4629987624999927 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 7 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 6 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf314 4.30243389408 0 87.903500775 1.9747488375000017 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 6 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf315 5.6811635579 0 87.40649965 2.7202505249999973 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf316 2.75600685918 0 87.47199945 2.6220008250000006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 6 8 promise swing_level 6 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf317 4.25571521296 0 87.866499525 2.030250712500006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 7 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf318 5.70556023134 0 87.472999575 2.620500637499994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 3 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf319 3.4803697203 0 87.45650025 2.645249624999991 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 4 10 promise swing_level 6 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf320 2.41170033166 0 87.8015011 2.1277483500000045 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 4 9 promise swing_level 7 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf321 2.91453798106 0 88.827998575 0.5880021375000055 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 9 promise swing_level 6 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf322 5.21649308565 0 87.388500425 2.7472493624999927 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv perf 25 add fp32 1 relu fp32 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf323 4.0223721329 0 88.184000725 1.553998912499992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 6 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf324 2.68424189854 0 88.793500875 0.6397486874999885 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 4 3 promise swing_level 7 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf325 2.83511438559 0 87.24449975 2.963250374999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 6 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf326 2.92808278063 0 87.37149925 2.772751124999999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 7 promise swing_level 3 8 promise swing_level 3 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf327 4.24671116143 0 87.400498775 2.7292518374999943 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf328 3.15775123136 0 87.6400005 2.3699992499999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv perf 24 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv perf 24 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 6 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf329 4.36285352449 0 87.8960001 1.985999850000006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 7 promise swing_level 4 8 promise swing_level 5 9 promise swing_level 4 @@ -6264,151 +6264,151 @@ conf329 4.36285352449 0 87.8960001 1.985999850000006 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf330 3.06744036932 0 87.604000825 2.4239987624999983 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 7 9 promise swing_level 4 10 promise swing_level 4 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf331 3.51924960433 0 87.984001675 1.8539974874999956 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 6 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 7 11 promise swing_level 7 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf332 1.97887636823 0 87.70750155 2.268747674999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 6 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 4 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf333 3.66655805094 0 87.5700005 2.474999249999989 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 7 -11 gpu conv perf 25 add fp32 1 relu fp32 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 12 promise swing_level 4 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf334 5.3292236196 0 87.3149995 2.85750075 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf335 3.49460954335 0 87.72250085 2.246248724999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 7 9 promise swing_level 4 10 promise swing_level 6 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf336 3.87633150306 0 88.219 1.5015000000000072 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf337 4.77391816655 0 87.6929995 2.2905007499999996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 @@ -6416,17 +6416,17 @@ conf337 4.77391816655 0 87.6929995 2.2905007499999996 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf338 5.46563323138 0 87.413000675 2.7104989874999887 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 6 8 promise swing_level 6 @@ -6434,94 +6434,94 @@ conf338 5.46563323138 0 87.413000675 2.7104989874999887 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf339 4.14507357871 0 88.238000125 1.4729998124999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf340 2.43724039851 0 87.390999425 2.7435008624999924 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 7 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 7 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 4 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf341 4.34622001395 0 87.8864993 2.000251050000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 6 -11 gpu conv perf 25 add fp32 1 relu fp32 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf342 3.20175368999 0 87.851000025 2.0534999624999912 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 6 11 promise swing_level 7 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf343 5.81657431633 0 87.398499175 2.7322512375000017 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 5 @@ -6529,132 +6529,132 @@ conf343 5.81657431633 0 87.398499175 2.7322512375000017 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf344 4.74361715684 0 87.3639998 2.7840002999999953 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 5 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf345 3.51722335677 0 87.509999625 2.565000562499989 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 4 10 promise swing_level 6 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf346 4.64144672088 0 87.335000075 2.8274998875000037 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv perf 25 add fp32 1 relu fp32 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf347 4.51268375266 0 87.593500525 2.439749212500004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 6 11 promise swing_level 7 12 promise swing_level 6 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf348 5.27287413815 0 87.855500025 2.0467499625000016 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf349 3.32318365683 0 88.109999825 1.6650002624999942 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 7 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf350 4.41928786908 0 87.621000125 2.398499812500006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 5 @@ -6663,720 +6663,720 @@ conf350 4.41928786908 0 87.621000125 2.398499812500006 11 promise swing_level 3 12 promise swing_level 4 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf351 3.07257198686 0 87.6390011 2.371498349999996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 6 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf352 4.53244534626 0 87.325999125 2.841001312500005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 3 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf353 2.64478009746 0 87.81599965 2.106000525000006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 7 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf354 4.47727265968 0 87.44850005 2.657249924999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf355 4.54700338998 0 87.613500275 2.409749587499988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 6 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 7 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf356 3.25049280868 0 88.09849965 1.6822505250000077 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 7 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 6 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 4 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf357 4.45554960441 0 88.16199915 1.5870012749999987 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf358 5.1994571495 0 87.562999675 2.4855004874999977 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf359 3.83161602186 0 87.80850105 2.1172484249999926 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 4 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf360 4.46549752052 0 87.55899985 2.4915002249999887 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 3 10 promise swing_level 6 11 promise swing_level 7 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf361 4.96417752313 0 87.63449995 2.3782500749999897 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 4 8 promise swing_level 7 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf362 4.69888006494 0 87.563000775 2.485498837499989 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 -9 gpu conv samp 32 add fp32 1 relu fp32 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf363 3.21895495215 0 87.6415007 2.3677489500000064 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf364 4.40222210176 0 87.5285 2.5372500000000073 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf365 5.07700641199 0 87.9765005 1.865249249999998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv perf 25 add fp32 1 relu fp32 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf366 5.55581088584 0 87.463999175 2.6340012375000015 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 7 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf367 4.98071928914 0 87.302500375 2.8762494375000074 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 5 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf368 2.16754034151 0 87.47100095 2.623498574999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 6 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 4 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf369 4.18130931289 0 87.906500475 1.9702492874999962 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 6 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf370 3.82431559264 0 87.444000625 2.6639990624999967 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 5 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf371 2.95052626852 0 87.7895002 2.145749699999989 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 6 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf372 5.44450677864 0 87.582001825 2.456997262499989 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf373 3.1906294495 0 88.102000225 1.6769996625000019 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 3 10 promise swing_level 6 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf374 4.45226698835 0 87.590500275 2.4442495875000034 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf375 4.64144672088 0 87.83549955 2.0767506750000067 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf376 2.70248005635 0 87.67299995 2.320500074999991 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 4 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 6 -12 gpu conv perf 21 add fp32 1 relu fp32 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf377 4.28315376855 0 87.339999825 2.8200002624999883 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf378 3.20087839793 0 87.3874994 2.7487509000000045 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 7 12 promise swing_level 3 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf379 3.59440287771 0 87.849001475 2.056497787500007 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 5 9 promise swing_level 4 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf380 3.70804604786 0 88.114000725 1.658998912500003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf381 3.32964139314 0 88.226000775 1.490998837499994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 7 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf382 5.73566444112 0 87.413499375 2.7097509374999973 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf383 4.48223604196 0 87.598000075 2.432999887499996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf384 5.57521108066 0 87.5435005 2.5147492500000084 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf385 5.26240713464 0 87.630500275 2.384249587499994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf386 4.93956986982 0 87.2689999 2.9265001500000025 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 5 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 4 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf387 2.90603825734 0 87.765500175 2.181749737499999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 -9 gpu conv samp 32 add fp32 1 relu fp32 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf388 4.18130931289 0 88.0334995 1.779750749999991 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 @@ -7384,151 +7384,151 @@ conf388 4.18130931289 0 88.0334995 1.779750749999991 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf389 3.48951053535 0 87.465000475 2.6324992875000035 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 4 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 4 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf390 3.17956425357 0 88.01350045 1.8097493250000056 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf391 2.59289736869 0 87.548500875 2.5072486874999953 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf392 5.53392845098 0 87.348500225 2.8072496625000056 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 4 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf393 5.63830296248 0 87.460500375 2.6392494375000055 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 5 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf394 4.84649698403 0 87.7850002 2.1524997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf395 4.83765289368 0 87.569999825 2.4750002625000036 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf396 5.85932654499 0 87.419998875 2.7000016874999915 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 5 @@ -7536,18 +7536,18 @@ conf396 5.85932654499 0 87.419998875 2.7000016874999915 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 4 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf397 4.11653407145 0 87.64749985 2.3587502249999943 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 @@ -7556,170 +7556,170 @@ conf397 4.11653407145 0 87.64749985 2.3587502249999943 11 promise swing_level 3 12 promise swing_level 6 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf398 2.55250487316 0 88.215500825 1.5067487624999885 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 6 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 4 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf399 5.58562815692 0 87.553999775 2.499000337500007 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 5 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf400 5.57521108066 0 87.411 2.7134999999999962 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 5 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf401 2.91242348471 0 87.67149975 2.3227503750000054 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 6 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 7 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf402 3.5644026837 0 87.472500025 2.621249962499995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 4 10 promise swing_level 6 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf403 5.20740625144 0 87.407999175 2.7180012374999976 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 7 10 promise swing_level 3 -11 gpu conv perf 25 add fp32 1 relu fp32 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf404 3.69429895335 0 87.891000225 1.9934996624999997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 6 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf405 5.1524092578 0 87.271000525 2.9234992124999906 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 7 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf406 2.69515771806 0 87.5774995 2.4637507499999955 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv perf 25 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv perf 25 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 7 promise swing_level 6 8 promise swing_level 7 9 promise swing_level 6 @@ -7727,18 +7727,18 @@ conf406 2.69515771806 0 87.5774995 2.4637507499999955 11 promise swing_level 3 12 promise swing_level 7 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf407 3.24694352749 0 88.054499925 1.7482501124999956 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 7 promise swing_level 4 8 promise swing_level 5 9 promise swing_level 4 @@ -7746,132 +7746,132 @@ conf407 3.24694352749 0 88.054499925 1.7482501124999956 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf408 5.6503204036 0 87.5479999 2.508000150000008 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf409 2.64389671848 0 88.029500575 1.785749137499998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +4 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 6 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf410 4.93142144318 0 87.293999525 2.8890007124999926 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf411 3.44077998003 0 87.917000375 1.9544994374999973 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 4 3 promise swing_level 7 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 7 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf412 4.89107926597 0 87.47749955 2.6137506749999915 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf413 3.87133831066 0 87.9260018 1.9409973000000065 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf414 4.83765289368 0 87.692000425 2.2919993624999933 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 @@ -7879,94 +7879,94 @@ conf414 4.83765289368 0 87.692000425 2.2919993624999933 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf415 2.7826430592 0 88.1980009 1.5329986500000032 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 7 promise swing_level 5 8 promise swing_level 4 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf416 5.52370314346 0 87.568999675 2.4765004874999974 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf417 2.91313300915 0 87.745501175 2.2117482374999966 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 4 10 promise swing_level 6 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 4 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf418 4.11653407145 0 87.8790011 2.0114983500000037 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf419 4.89107926597 0 87.687001025 2.2994984624999972 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 @@ -7974,17 +7974,17 @@ conf419 4.89107926597 0 87.687001025 2.2994984624999972 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf420 4.16238802351 0 87.612000825 2.411998762500005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 @@ -7993,398 +7993,398 @@ conf420 4.16238802351 0 87.612000825 2.411998762500005 11 promise swing_level 3 12 promise swing_level 5 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf421 5.20061187391 0 87.76800095 2.1779985749999966 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf422 3.94545187015 0 87.30050005 2.8792499250000034 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf423 3.45622671318 0 87.34249955 2.816250674999999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 9 promise swing_level 5 10 promise swing_level 6 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf424 5.39169995164 0 87.344500975 2.8132485374999945 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 6 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf425 2.76174914808 0 87.21200065 3.0119990250000086 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 7 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 6 7 promise swing_level 3 8 promise swing_level 7 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf426 4.41276444601 0 87.6114991 2.4127513499999935 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 6 11 promise swing_level 7 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf427 4.15082903285 0 87.4785015 2.6122477500000088 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf428 4.44888355108 0 87.612000575 2.411999137500004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 6 11 promise swing_level 7 12 promise swing_level 5 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf429 5.58562815692 0 87.249000775 2.9564988375 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf430 2.34157431888 0 87.835499775 2.0767503374999947 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 4 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf431 2.97029931618 0 87.401499975 2.7277500375000088 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 7 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 7 promise swing_level 5 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 5 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf432 5.61055342885 0 87.507500225 2.5687496624999966 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf433 2.76816971829 0 87.796999925 2.1345001125000067 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 5 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf434 3.10609309304 0 87.527001425 2.5394978625000064 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv perf 28 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +3 gpu conv perf 28 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 7 9 promise swing_level 4 10 promise swing_level 4 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf435 4.02914883006 0 87.9185005 1.9522492500000084 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 6 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 6 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf436 4.47887830633 0 87.831500275 2.082749587500004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 7 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf437 4.06546467919 0 88.1195005 1.650749249999997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf438 2.80581525977 0 87.4994993 2.5807510500000035 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 7 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 4 11 promise swing_level 5 12 promise swing_level 6 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf439 3.54167107572 0 87.2524988 2.9512518000000014 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv perf 25 add fp32 1 relu fp32 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf440 4.51268375266 0 87.5539999 2.4990001500000076 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 6 11 promise swing_level 7 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf441 4.82010253458 0 87.6625014 2.3362479000000036 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 7 promise swing_level 4 8 promise swing_level 7 9 promise swing_level 4 @@ -8392,131 +8392,131 @@ conf441 4.82010253458 0 87.6625014 2.3362479000000036 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf442 5.73566444112 0 87.538501025 2.522248462499995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf443 2.48821463818 0 87.589499825 2.445750262499992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 4 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 promise swing_level 4 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 promise swing_level 4 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf444 2.86742915865 0 87.9040003 1.9739995499999878 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 5 -12 gpu conv perf 25 add fp32 1 relu fp32 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf445 4.07167938936 0 87.750999625 2.2035005624999897 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 4 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf446 5.47313823098 0 87.5724998 2.471250299999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf447 4.18791473608 0 87.642001275 2.3669980875000007 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 7 promise swing_level 7 8 promise swing_level 3 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 6 -11 gpu conv perf 25 add fp32 1 relu fp32 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf448 4.35649552611 0 87.561500725 2.4877489124999954 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 @@ -8525,18 +8525,18 @@ conf448 4.35649552611 0 87.561500725 2.4877489124999954 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf449 4.52977856701 0 87.763000625 2.185499062499993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 7 promise swing_level 5 8 promise swing_level 4 9 promise swing_level 4 @@ -8544,140 +8544,140 @@ conf449 4.52977856701 0 87.763000625 2.185499062499993 11 promise swing_level 6 12 promise swing_level 6 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf450 4.93142144318 0 87.556500025 2.49524996249999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf451 3.10526931609 0 88.6169991 0.9045013499999968 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 6 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 4 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 6 11 promise swing_level 6 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf452 5.73566444112 0 87.433000725 2.680498912499999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 4 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf453 3.38240427328 0 88.184500025 1.55324996249999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 7 3 promise swing_level 6 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv perf 25 add fp32 1 relu fp32 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf454 5.37473575142 0 87.600000375 2.4299994375000082 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf455 4.81234318399 0 87.29249975 2.8912503749999914 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 8 promise swing_level 6 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf456 4.47385112487 0 87.9389998 1.921500299999991 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 3 promise swing_level 6 4 promise swing_level 6 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 7 promise swing_level 4 8 promise swing_level 6 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar100/data/autotuner_data/tuner_confs_batch220.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar100/data/autotuner_data/tuner_confs_batch220.txt index 13ef55668b..93a000cc5b 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar100/data/autotuner_data/tuner_confs_batch220.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar100/data/autotuner_data/tuner_confs_batch220.txt @@ -19,7392 +19,7392 @@ conf1 1 0 68.41 0 ----- +++++ conf1 1.75386164622 0 67.82 0.8850000000000051 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf2 1.76576233649 0 67.82 0.8850000000000051 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 35 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 35 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf3 1.75759082188 0 68.020004 0.5849939999999947 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 35 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 35 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf4 1.68609979073 0 68.18 0.6299999999999898 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf5 1.72480010241 0 67.760002 0.9749969999999948 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 28 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 36 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 28 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 36 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf6 1.75671194279 0 67.760002 0.9749969999999948 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 29 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 34 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 29 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 34 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf7 1.93100141548 0 67.860001 0.8249984999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf8 1.80848474825 0 67.82 0.8850000000000051 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf9 1.76576233649 0 68.279999 0.530000999999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf10 1.78301390218 0 68.340004 0.4699960000000033 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf11 1.76576233649 0 69.220001 -0.4100009999999997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf12 1.76576233649 0 68.139999 0.6700009999999935 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 36 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 36 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf13 1.77850065036 0 67.840004 0.8549940000000049 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf14 1.71511640935 0 67.879997 0.7950044999999903 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf15 1.76421055161 0 68.060005 0.5249924999999891 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 36 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 36 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf16 1.67126060797 0 68.300003 0.5099969999999928 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf17 1.93100141548 0 68.020004 0.5849939999999947 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf18 1.90818885609 0 67.840004 0.8549940000000049 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 36 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 33 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 36 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 33 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf19 1.88312901468 0 67.919998 0.7350029999999848 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf20 1.79991399207 0 68.0 0.6149999999999949 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf21 1.75759082188 0 68.259995 0.5500049999999931 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 35 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 35 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf22 1.7335238534 0 67.880005 0.7949924999999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 24 add fp32 1 relu fp32 1 -10 gpu conv samp 34 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 24 add fp16 1 relu fp16 1 +10 gpu conv samp 34 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf23 1.76576233649 0 69.12001 -0.3100099999999969 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 35 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 35 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf24 1.76576233649 0 68.340004 0.4699960000000033 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf25 1.92123320044 0 67.959999 0.6750015000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf26 1.74514922821 0 67.959999 0.6750015000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 35 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 35 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf27 1.76576233649 0 69.400002 -0.590002000000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf28 1.96091127177 0 68.040001 0.5549984999999893 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf29 1.80848474825 0 67.940002 0.7049969999999846 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf30 1.74212029597 0 67.819992 0.8850119999999961 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf31 1.79991399207 0 67.760002 0.9749969999999948 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf32 1.77468231224 0 68.159996 0.6500039999999899 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf33 1.71511640935 0 68.099998 0.46500299999999584 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf34 1.83613222344 0 68.080002 0.494997000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf35 1.8098822979 0 68.019997 0.5850044999999895 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 28 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 28 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf36 1.91078353522 0 68.039993 0.5550105000000016 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf37 1.72501183205 0 68.0 0.6149999999999949 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf38 1.80848474825 0 67.779999 0.9450014999999894 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf39 1.80848474825 0 68.0 0.6149999999999949 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf40 1.77468231224 0 67.839996 0.8550059999999959 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf41 1.79991399207 0 67.939995 0.7050075000000007 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf42 1.72501183205 0 67.919998 0.7350029999999848 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 35 add fp32 1 relu fp32 1 -10 gpu conv samp 36 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 35 add fp16 1 relu fp16 1 +10 gpu conv samp 36 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf43 1.80848474825 0 68.0 0.6149999999999949 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf44 1.95817876597 0 67.840004 0.8549940000000049 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 35 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 35 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf45 1.68609979073 0 68.160004 0.6499959999999959 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 34 add fp32 1 relu fp32 1 -10 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 34 add fp16 1 relu fp16 1 +10 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf46 1.76576233649 0 68.119995 0.6900049999999937 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 34 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 34 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf47 1.75671194279 0 67.919998 0.7350029999999848 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 28 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 34 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 28 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 34 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf48 1.77468231224 0 68.099998 0.46500299999999584 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf49 1.87233994382 0 68.239998 0.5700019999999967 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf50 1.81385377753 0 68.5 0.3099999999999966 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf51 1.77468231224 0 67.900002 0.7649969999999939 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf52 1.68609979073 0 67.919998 0.7350029999999848 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 -10 gpu conv samp 36 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv samp 36 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf53 1.77468231224 0 67.860001 0.8249984999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf54 1.76576233649 0 68.880005 -0.07000500000000043 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 34 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 34 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf55 1.97108819589 0 67.840004 0.8549940000000049 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf56 1.72353081526 0 67.860001 0.8249984999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 30 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 36 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 30 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 36 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf57 1.73716695359 0 68.599998 0.21000199999999725 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 34 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 34 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf58 1.67864740553 0 68.099998 0.46500299999999584 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 35 add fp32 1 relu fp32 1 -10 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 35 add fp16 1 relu fp16 1 +10 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf59 1.80129832153 0 68.300003 0.5099969999999928 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf60 1.97108819589 0 67.82 0.8850000000000051 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf61 1.76576233649 0 69.259995 -0.4499950000000069 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 35 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 35 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf62 1.69423121755 0 68.099991 0.46501349999999064 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf63 1.93100141548 0 67.880005 0.7949924999999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf64 1.6239420394 0 68.660004 0.1499959999999959 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 34 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 34 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf65 1.79991399207 0 68.020004 0.5849939999999947 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf66 1.74212029597 0 68.0 0.6149999999999949 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf67 1.76576233649 0 67.860001 0.8249984999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 33 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 33 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf68 1.80848474825 0 67.839996 0.8550059999999959 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf69 1.80848474825 0 67.819992 0.8850119999999961 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf70 1.9204455126 0 67.919998 0.7350029999999848 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 36 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 36 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf71 1.71511640935 0 67.960007 0.6749894999999881 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf72 1.89251265611 0 68.160004 0.6499959999999959 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf73 1.74514922821 0 68.119995 0.6900049999999937 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf74 1.71511640935 0 67.760002 0.9749969999999948 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 36 add fp32 1 relu fp32 1 -10 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 36 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 36 add fp16 1 relu fp16 1 +10 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 36 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf75 1.76576233649 0 68.119995 0.6900049999999937 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf76 1.80848474825 0 67.860001 0.8249984999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf77 1.76576233649 0 67.840004 0.8549940000000049 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 34 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 34 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf78 1.72501183205 0 68.039993 0.5550105000000016 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf79 1.79761149863 0 67.840004 0.8549940000000049 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 36 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 36 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf80 1.76576233649 0 67.919998 0.7350029999999848 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 33 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 33 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf81 1.95817876597 0 67.880005 0.7949924999999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 35 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 35 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf82 1.82729805085 0 68.480003 0.3299970000000002 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf83 1.97108819589 0 68.199997 0.6100030000000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf84 1.78210942024 0 68.199997 0.6100030000000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 34 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 34 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf85 1.82729805085 0 68.259995 0.5500049999999931 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf86 1.77468231224 0 67.840004 0.8549940000000049 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf87 1.65668034261 0 68.719994 0.09000599999999681 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 34 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 -10 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 34 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf88 1.7335238534 0 68.059998 0.5250030000000052 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf89 1.72501183205 0 68.199997 0.6100030000000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf90 1.7335238534 0 67.82 0.8850000000000051 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 34 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 34 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf91 1.87233994382 0 68.080002 0.494997000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf92 1.97108819589 0 67.940002 0.7049969999999846 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf93 1.7335238534 0 67.959999 0.6750015000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 36 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 36 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf94 1.72501183205 0 68.080002 0.494997000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf95 1.76576233649 0 69.020004 -0.21000400000000352 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf96 1.76576233649 0 68.020004 0.5849939999999947 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 35 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 35 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf97 1.77468231224 0 67.800003 0.9149954999999892 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 24 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 24 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf98 1.75671194279 0 68.119995 0.6900049999999937 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 28 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 33 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 28 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 33 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf99 1.7335238534 0 69.0 -0.1900000000000034 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf100 1.78686821524 0 68.200005 0.6099949999999922 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv fp16 1 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv fp16 1 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf101 1.76642822315 0 68.0 0.6149999999999949 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 24 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 24 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf102 1.76576233649 0 67.980003 0.6449955000000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 36 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 36 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf103 1.76642822315 0 67.900002 0.7649969999999939 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf104 1.7335238534 0 68.359993 0.4500069999999937 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf105 1.96832727362 0 67.800003 0.9149954999999892 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf106 1.79761149863 0 67.840004 0.8549940000000049 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 35 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 35 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf107 1.96091127177 0 67.900002 0.7649969999999939 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf108 1.76642822315 0 67.800003 0.9149954999999892 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf109 1.79761149863 0 68.040001 0.5549984999999893 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 34 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 34 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf110 1.9408694681 0 68.060005 0.5249924999999891 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf111 1.7335238534 0 68.439995 0.3700050000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf112 1.76576233649 0 68.68 0.1299999999999898 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 34 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 34 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf113 1.76576233649 0 67.879997 0.7950044999999903 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 36 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 36 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf114 1.97108819589 0 67.900002 0.7649969999999939 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf115 1.76576233649 0 67.779999 0.9450014999999894 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 34 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 34 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf116 1.79991399207 0 67.879997 0.7950044999999903 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf117 1.96091127177 0 67.979996 0.6450059999999951 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf118 1.79991399207 0 67.780006 0.9449909999999946 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf119 1.6977108722 0 68.720001 0.08999900000000027 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 34 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 34 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf120 1.63281063836 0 68.32 0.49000000000000343 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf121 1.87233994382 0 67.940002 0.7049969999999846 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf122 1.80848474825 0 67.979996 0.6450059999999951 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf123 1.76576233649 0 69.139999 -0.3299990000000065 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf124 1.72501183205 0 68.959999 -0.14999899999999966 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 35 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 35 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 35 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 35 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf125 1.68609979073 0 69.120003 -0.31000300000000036 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf126 1.74212029597 0 67.900002 0.7649969999999939 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf127 1.97108819589 0 67.919998 0.7350029999999848 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf128 1.79899228725 0 67.959999 0.6750015000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 28 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 34 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 28 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 34 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf129 1.93100141548 0 67.900002 0.7649969999999939 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 24 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 24 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf130 1.72501183205 0 68.599998 0.21000199999999725 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 34 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 35 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 34 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 35 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf131 1.76421055161 0 67.919998 0.7350029999999848 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 36 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 36 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf132 1.75671194279 0 67.939995 0.7050075000000007 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 28 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 34 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 28 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 34 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf133 1.95817876597 0 68.0 0.6149999999999949 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 33 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 33 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf134 1.79761149863 0 67.900002 0.7649969999999939 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 36 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 36 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf135 1.80848474825 0 68.300003 0.5099969999999928 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf136 1.71511640935 0 68.080002 0.494997000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 35 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 35 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf137 1.80848474825 0 67.879997 0.7950044999999903 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf138 1.97108819589 0 67.840004 0.8549940000000049 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf139 1.97108819589 0 67.779999 0.9450014999999894 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf140 1.76642822315 0 68.0 0.6149999999999949 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf141 1.75539527097 0 68.099998 0.46500299999999584 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 34 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 36 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 34 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 36 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf142 1.80848474825 0 67.840004 0.8549940000000049 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf143 1.71574463884 0 67.240005 1.7549925000000002 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 34 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 34 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf144 1.79991399207 0 67.240005 1.7549925000000002 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 36 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 36 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf145 1.75080242194 0 67.260002 1.7249969999999948 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 29 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 30 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 29 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 30 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf146 1.73845641226 0 67.159996 1.8750059999999849 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 29 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 29 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf147 1.80060589073 0 67.120003 1.9349954999999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 24 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 24 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf148 1.83063069527 0 67.239998 1.755002999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf149 1.79830162789 0 67.199997 1.8150045000000006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 28 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 34 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 34 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf150 1.80060589073 0 67.260002 1.7249969999999948 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 29 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 29 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf151 1.77021108765 0 67.940002 0.7049969999999846 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 34 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 34 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf152 1.84432575845 0 67.18 1.8449999999999847 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf153 1.80060589073 0 67.259995 1.7250074999999896 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf154 1.84190831428 0 67.180008 1.8449879999999936 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 33 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 33 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf155 1.80918325318 0 67.159996 1.8750059999999849 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf156 1.85332574639 0 67.480003 1.3949955000000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf157 1.80848474825 0 67.220009 1.784986499999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf158 1.80918325318 0 67.220001 1.7849985000000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf159 1.84432575845 0 67.900002 0.7649969999999939 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf160 1.74060976926 0 67.159996 1.8750059999999849 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 25 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 33 add fp32 1 relu fp32 1 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 25 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 33 add fp16 1 relu fp16 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf161 1.84432575845 0 67.319992 1.6350119999999961 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf162 1.76487526817 0 67.120003 1.9349954999999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 36 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 36 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf163 1.78143166076 0 67.919998 0.7350029999999848 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 34 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 34 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf164 1.80918325318 0 67.199997 1.8150045000000006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf165 1.84190831428 0 67.12001 1.9349850000000046 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 34 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 34 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf166 1.75825055853 0 67.340004 1.604994000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 34 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 34 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf167 1.84190831428 0 67.259995 1.7250074999999896 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf168 1.75891079064 0 67.279991 1.6950135000000017 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf169 1.80918325318 0 67.220001 1.7849985000000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf170 1.79600325511 0 67.18 1.8449999999999847 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf171 1.7921094753 0 67.259995 1.7250074999999896 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 30 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 30 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf172 1.7494945904 0 67.5 1.3649999999999949 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 23 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 23 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf173 1.68731452527 0 67.5 1.3649999999999949 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 25 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 25 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf174 1.75080242194 0 67.159996 1.8750059999999849 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 29 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 29 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf175 1.84190831428 0 67.220001 1.7849985000000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 33 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 33 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf176 1.85332574639 0 67.520004 1.3349939999999947 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf177 1.83541275828 0 67.139999 1.9050014999999902 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf178 1.81784272497 0 67.279991 1.6950135000000017 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf179 1.84432575845 0 67.280006 1.6949909999999946 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf180 1.81784272497 0 67.439995 1.4550075000000007 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf181 1.75759082188 0 67.260002 1.7249969999999948 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf182 1.79991399207 0 68.319992 0.49000799999999745 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf183 1.76709461222 0 67.220009 1.784986499999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 29 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 29 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf184 1.81784272497 0 67.139999 1.9050014999999902 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf185 1.84432575845 0 67.620003 1.1849954999999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf186 1.74060976926 0 67.159996 1.8750059999999849 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 25 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 33 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 25 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 33 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf187 1.80848474825 0 68.059998 0.5250030000000052 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf188 1.81784272497 0 67.219994 1.7850089999999952 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf189 1.80060589073 0 68.019997 0.5850044999999895 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf190 1.74060976926 0 67.139999 1.9050014999999902 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 29 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 34 add fp32 1 relu fp32 1 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 29 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 34 add fp16 1 relu fp16 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf191 1.85332574639 0 67.199997 1.8150045000000006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf192 1.81784272497 0 67.260002 1.7249969999999948 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf193 1.76576233649 0 67.68 1.0949999999999847 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 34 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 34 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf194 1.80848474825 0 67.18 1.8449999999999847 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 34 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 34 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf195 1.79830162789 0 67.219994 1.7850089999999952 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 34 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 34 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf196 1.80918325318 0 67.180008 1.8449879999999936 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf197 1.79991399207 0 67.379997 1.5450044999999903 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 36 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 36 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf198 1.78301390218 0 67.279999 1.6950014999999894 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 30 add fp32 1 relu fp32 1 -9 gpu conv samp 33 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 30 add fp16 1 relu fp16 1 +9 gpu conv samp 33 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf199 1.79991399207 0 67.839996 0.8550059999999959 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf200 1.84432575845 0 67.18 1.8449999999999847 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 28 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf201 1.85332574639 0 67.659996 1.1250059999999849 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf202 1.76709461222 0 67.220001 1.7849985000000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf203 1.76865147708 0 67.959999 0.6750015000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 34 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 34 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf204 1.74645056866 0 67.279999 1.6950014999999894 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 28 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 28 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf205 1.79142408964 0 67.940002 0.7049969999999846 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 35 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 35 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf206 1.85332574639 0 67.560005 1.274992499999989 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf207 1.80060589073 0 67.220001 1.7849985000000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf208 1.81949033684 0 67.760002 0.9749969999999948 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 36 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 36 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf209 1.80848474825 0 68.099998 0.46500299999999584 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf210 1.66315080178 0 67.199997 1.8150045000000006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 25 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 -10 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 33 add fp32 1 relu fp32 1 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 25 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 33 add fp16 1 relu fp16 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf211 1.74645056866 0 67.139999 1.9050014999999902 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 29 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 29 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf212 1.79600325511 0 67.220001 1.7849985000000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf213 1.80685699907 0 67.219994 1.7850089999999952 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 34 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 34 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf214 1.69280256099 0 67.199997 1.8150045000000006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 25 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 33 add fp32 1 relu fp32 1 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 25 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 33 add fp16 1 relu fp16 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf215 1.80848474825 0 67.579994 1.245008999999996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf216 1.85332574639 0 67.240005 1.7549925000000002 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf217 1.84432575845 0 67.159996 1.8750059999999849 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf218 1.67245404754 0 67.199997 1.8150045000000006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 25 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 25 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf219 1.75671194279 0 67.139999 1.9050014999999902 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf220 1.71721229896 0 67.279999 1.6950014999999894 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 -10 gpu conv samp 34 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv samp 34 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf221 1.80060589073 0 67.300003 1.6649954999999892 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf222 1.80685699907 0 67.240005 1.7549925000000002 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf223 1.85332574639 0 67.18 1.8449999999999847 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf224 1.67245404754 0 67.180008 1.8449879999999936 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 25 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 25 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf225 1.76266149181 0 67.139999 1.9050014999999902 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf226 1.80848474825 0 67.960007 0.6749894999999881 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf227 1.84432575845 0 67.160004 1.8749939999999938 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf228 1.75891079064 0 67.279991 1.6950135000000017 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf229 1.84432575845 0 67.639999 1.1550014999999902 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf230 1.71847229328 0 67.760002 0.9749969999999948 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 29 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 29 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf231 1.85332574639 0 67.740005 1.0049925000000002 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf232 1.70099892334 0 67.719994 1.0350089999999952 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 25 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 33 add fp32 1 relu fp32 1 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 25 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 33 add fp16 1 relu fp16 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf233 2.12520432933 0 66.580002 2.744997000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 36 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 36 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf234 1.88237225557 0 67.18 1.8449999999999847 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 24 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 24 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf235 1.9204455126 0 67.439995 1.4550075000000007 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf236 1.87233994382 0 67.05999 2.0250149999999962 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 34 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 34 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf237 2.12199513594 0 66.580002 2.744997000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf238 1.9204455126 0 66.599998 2.715002999999996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 26 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 26 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf239 1.88161610445 0 68.900002 -0.09000200000000402 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf240 1.89608765615 0 67.259995 1.7250074999999896 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv fp16 1 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv fp16 1 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf241 1.91782454487 0 66.599991 2.7150134999999906 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 34 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 34 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf242 1.78369286638 0 67.139999 1.9050014999999902 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf243 1.84991003462 0 67.400002 1.514996999999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 29 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 34 add fp32 1 relu fp32 1 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 29 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 34 add fp16 1 relu fp16 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf244 1.8606877721 0 67.020004 2.0849939999999947 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 35 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 35 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 35 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 35 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf245 1.9204455126 0 66.599998 2.715002999999996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf246 1.86810851989 0 66.620003 2.6849954999999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv perf 24 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv perf 24 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf247 1.86315479542 0 67.080002 1.994997000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf248 1.88237225557 0 67.479996 1.395005999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf249 2.00311904721 0 66.639999 2.6550014999999902 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf250 1.93020569918 0 66.559998 2.775003000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf251 1.93020569918 0 66.599998 2.715002999999996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 34 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 34 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf252 1.9204455126 0 66.639999 2.6550014999999902 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 30 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 30 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf253 1.87308865635 0 66.819992 2.385011999999996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf254 1.87308865635 0 66.659996 2.625005999999985 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf255 1.8052321774 0 66.580002 2.744997000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv fp16 1 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv fp16 1 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf256 1.76842889988 0 66.5 2.864999999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 24 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 24 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf257 2.11656169377 0 66.5 2.864999999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf258 1.91078353522 0 67.280006 1.6949909999999946 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf259 1.99260956046 0 66.520004 2.8349939999999947 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf260 1.98978806184 0 67.340004 1.604994000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf261 1.88237225557 0 66.479996 2.895005999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv perf 23 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv perf 23 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf262 1.9204455126 0 66.82 2.385000000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 28 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf263 2.00026772891 0 67.18 1.8449999999999847 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 36 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 36 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf264 1.82872484006 0 67.199997 1.8150045000000006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf265 2.00798498953 0 66.639999 2.6550014999999902 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf266 1.87059527344 0 67.120003 1.9349954999999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 34 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 34 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf267 1.82658549081 0 67.619995 1.1850074999999904 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf268 2.14041849205 0 66.699997 2.5650045000000006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf269 2.14041849205 0 66.479996 2.895005999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf270 2.00311904721 0 67.479996 1.395005999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf271 1.86315479542 0 67.220001 1.7849985000000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf272 1.87233994382 0 68.240005 0.5699950000000001 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 34 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 34 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf273 1.9204455126 0 66.559998 2.775003000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf274 1.89098463675 0 66.959999 2.1750015000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf275 1.9204455126 0 67.279999 1.6950014999999894 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf276 1.9408694681 0 67.180008 1.8449879999999936 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf277 1.89608765615 0 67.499992 1.365011999999986 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf278 1.87308865635 0 67.180008 1.8449879999999936 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf279 1.87985411209 0 66.680008 2.5949879999999936 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv perf 23 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 36 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv perf 23 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 36 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf280 1.86142719283 0 66.719994 2.535008999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 34 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 34 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf281 1.99260956046 0 67.400002 1.514996999999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf282 1.80825203311 0 67.020004 2.0849939999999947 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 35 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 35 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf283 1.90121829214 0 66.459999 2.9250015000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf284 1.91078353522 0 67.459999 1.4250015000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 28 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf285 1.85479348724 0 66.580002 2.744997000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 24 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 24 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf286 2.09229584083 0 67.659996 1.1250059999999849 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf287 1.90121829214 0 66.740005 2.5049925 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 23 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 23 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf288 1.91078353522 0 67.300003 1.6649954999999892 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf289 1.99458937525 0 67.18 1.8449999999999847 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv samp 33 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv samp 33 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf290 2.06341640946 0 66.68 2.5949999999999847 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 36 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 36 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf291 1.9204455126 0 67.139999 1.9050014999999902 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf292 2.09323084228 0 66.860001 2.3249984999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 24 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 24 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf293 1.84505222959 0 66.62001 2.6849850000000046 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 30 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 30 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf294 1.83373619691 0 66.560005 2.774992499999989 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 36 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 36 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf295 1.83134641553 0 67.919998 0.7350029999999848 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf296 1.99260956046 0 66.459999 2.9250015000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 24 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 24 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf297 1.83613222344 0 67.919998 0.7350029999999848 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 35 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 35 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf298 1.80685699907 0 67.699997 1.0650045000000006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 36 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 36 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf299 1.93020569918 0 66.599998 2.715002999999996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf300 2.13716322068 0 66.519997 2.8350044999999895 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf301 2.15254971042 0 66.699997 2.5650045000000006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf302 1.77602808742 0 67.400009 1.5149864999999991 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 -10 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf303 1.85896473944 0 67.479996 1.395005999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf304 1.72543544732 0 66.480003 2.8949955000000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 24 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 24 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf305 1.9204455126 0 66.599998 2.715002999999996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf306 1.87233994382 0 66.460007 2.924989499999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 34 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 34 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf307 2.09854500865 0 66.639999 2.6550014999999902 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf308 1.88237225557 0 66.520004 2.8349939999999947 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 30 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 30 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf309 1.90121829214 0 67.520004 1.3349939999999947 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf310 1.93020569918 0 66.419998 2.9850029999999848 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 22 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 22 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf311 1.89608765615 0 66.82 2.385000000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf312 1.86142719283 0 66.860001 2.3249984999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 33 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 33 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf313 1.93020569918 0 66.639999 2.6550014999999902 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf314 1.9204455126 0 67.260002 1.7249969999999948 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf315 1.84021987061 0 66.499992 2.865011999999986 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 34 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 28 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 34 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf316 1.9204455126 0 67.32 1.6350000000000051 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf317 1.84577927325 0 67.459999 1.4250015000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf318 1.88237225557 0 66.419998 2.9850029999999848 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv perf 24 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv perf 24 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf319 1.87233994382 0 67.059998 2.025003000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf320 1.88161610445 0 66.620003 2.6849954999999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 36 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 36 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf321 1.97941763118 0 67.040001 2.0549984999999893 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 36 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 36 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf322 1.8555282298 0 67.559998 1.2750030000000052 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 28 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf323 2.14041849205 0 66.459999 2.9250015000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf324 1.8606877721 0 66.460007 2.924989499999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 36 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 36 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf325 1.93020569918 0 66.639999 2.6550014999999902 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv perf 22 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv perf 22 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf326 1.88237225557 0 67.219994 1.7850089999999952 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf327 1.91078353522 0 66.800003 2.414995499999989 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv perf 23 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv perf 23 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf328 1.88161610445 0 66.959999 2.1750015000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf329 1.84505222959 0 66.82 2.385000000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 36 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 36 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf330 1.80060589073 0 66.459999 2.9250015000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf331 1.82658549081 0 67.159996 1.8750059999999849 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf332 1.88237225557 0 67.120003 1.9349954999999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf333 2.00311904721 0 67.520004 1.3349939999999947 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf334 1.87233994382 0 68.439995 0.3700050000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf335 2.09323084228 0 66.480003 2.8949955000000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf336 1.90818885609 0 66.800003 2.414995499999989 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 34 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 34 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf337 1.87985411209 0 66.580002 2.744997000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv perf 25 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv perf 25 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf338 1.90818885609 0 67.020004 2.0849939999999947 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 33 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 33 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf339 1.87308865635 0 66.559998 2.775003000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 26 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 26 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf340 1.86810851989 0 66.980003 2.1449955000000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv perf 23 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv fp16 1 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv perf 23 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv fp16 1 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf341 1.99260956046 0 67.520004 1.3349939999999947 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf342 1.90121829214 0 66.819992 2.385011999999996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 29 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 29 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf343 1.78686821524 0 66.599998 2.715002999999996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 35 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 35 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf344 1.78369286638 0 68.459999 0.35000100000000034 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 34 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 34 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf345 1.87308865635 0 67.0 2.114999999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf346 2.14041849205 0 66.560005 2.774992499999989 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf347 1.87233994382 0 66.699997 2.5650045000000006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 36 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 36 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf348 1.90560121411 0 66.979996 2.145005999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf349 1.87233994382 0 67.720001 1.0349985000000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf350 1.9204455126 0 67.300003 1.6649954999999892 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf351 1.85479348724 0 68.040001 0.5549984999999893 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf352 2.15254971042 0 66.479996 2.895005999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf353 1.85479348724 0 66.479996 2.895005999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv perf 29 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv perf 29 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf354 1.88161610445 0 68.139999 0.6700009999999935 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 35 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 35 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf355 1.81784272497 0 67.300003 1.6649954999999892 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf356 1.93020569918 0 66.619995 2.6850074999999904 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf357 1.8606877721 0 67.259995 1.7250074999999896 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 36 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 25 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 36 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 36 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 25 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 36 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf358 1.87233994382 0 67.599998 1.2150029999999958 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 35 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 35 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf359 1.86389617859 0 66.819992 2.385011999999996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 28 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 28 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf360 1.72755664833 0 66.480003 2.8949955000000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 24 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 24 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf361 1.90121829214 0 66.720001 2.5349985000000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 24 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 24 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf362 2.07867782721 0 66.979996 2.145005999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf363 1.82492500993 0 67.419998 1.4850029999999848 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv perf 30 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 33 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv perf 30 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 33 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf364 1.74341712137 0 66.759995 2.4750074999999896 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf365 1.78369286638 0 67.259995 1.7250074999999896 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv fp16 1 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv fp16 1 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf366 1.93020569918 0 66.760002 2.474996999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf367 1.88237225557 0 66.599998 2.715002999999996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf368 1.90121829214 0 67.019997 2.0850044999999895 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv perf 25 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv perf 25 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf369 1.9204455126 0 66.599998 2.715002999999996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv perf 27 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv perf 27 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf370 1.8606877721 0 66.599998 2.715002999999996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 34 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 35 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 34 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 35 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf371 1.87233994382 0 67.840004 0.8549940000000049 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf372 1.96173251121 0 66.459999 2.9250015000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf373 1.9204455126 0 66.900002 2.264996999999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf374 1.9204455126 0 66.759995 2.4750074999999896 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 23 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 23 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf375 1.93020569918 0 66.5 2.864999999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf376 1.83541275828 0 66.5 2.864999999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf377 1.9204455126 0 67.319992 1.6350119999999961 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf378 1.98978806184 0 67.259995 1.7250074999999896 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 33 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 33 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf379 1.87059527344 0 67.080002 1.994997000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 36 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 36 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf380 2.03532335494 0 66.940002 2.2049969999999846 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf381 1.81925478089 0 67.520004 1.3349939999999947 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf382 1.9408694681 0 67.019997 2.0850044999999895 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 36 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 36 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf383 1.91782454487 0 66.899994 2.265008999999985 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv perf 21 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 36 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv perf 21 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 36 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf384 2.01373998071 0 67.459999 1.4250015000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf385 1.9408694681 0 68.139999 0.6700009999999935 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf386 1.91078353522 0 67.300003 1.6649954999999892 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf387 2.08083245943 0 67.659996 1.1250059999999849 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf388 1.82872484006 0 67.419998 1.4850029999999848 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv perf 29 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv perf 29 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf389 1.9204455126 0 67.340004 1.604994000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 31 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 31 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar100/data/autotuner_data/tuner_pareto_confs_batch220.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar100/data/autotuner_data/tuner_pareto_confs_batch220.txt index 331906b533..760701ff25 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar100/data/autotuner_data/tuner_pareto_confs_batch220.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar100/data/autotuner_data/tuner_pareto_confs_batch220.txt @@ -19,1882 +19,1882 @@ conf1 1 0 68.41 0 ----- +++++ conf1 1.93100141548 0 67.860001 0.8249984999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf2 1.78301390218 0 68.340004 0.4699960000000033 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf3 1.76576233649 0 69.220001 -0.4100009999999997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf4 1.93100141548 0 68.020004 0.5849939999999947 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf5 1.76576233649 0 69.12001 -0.3100099999999969 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 35 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 35 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf6 1.92123320044 0 67.959999 0.6750015000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf7 1.76576233649 0 69.400002 -0.590002000000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf8 1.96091127177 0 68.040001 0.5549984999999893 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf9 1.95817876597 0 67.840004 0.8549940000000049 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 35 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 35 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf10 1.81385377753 0 68.5 0.3099999999999966 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf11 1.76576233649 0 68.880005 -0.07000500000000043 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 34 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 34 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf12 1.97108819589 0 67.840004 0.8549940000000049 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf13 1.80129832153 0 68.300003 0.5099969999999928 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf14 1.97108819589 0 67.82 0.8850000000000051 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf15 1.76576233649 0 69.259995 -0.4499950000000069 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 35 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 35 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf16 1.93100141548 0 67.880005 0.7949924999999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf17 1.95817876597 0 67.880005 0.7949924999999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 35 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 35 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf18 1.82729805085 0 68.480003 0.3299970000000002 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf19 1.97108819589 0 68.199997 0.6100030000000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf20 1.82729805085 0 68.259995 0.5500049999999931 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf21 1.97108819589 0 67.940002 0.7049969999999846 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf22 1.76576233649 0 69.020004 -0.21000400000000352 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf23 1.7335238534 0 69.0 -0.1900000000000034 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf24 1.96832727362 0 67.800003 0.9149954999999892 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf25 1.96091127177 0 67.900002 0.7649969999999939 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf26 1.9408694681 0 68.060005 0.5249924999999891 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf27 1.76576233649 0 68.68 0.1299999999999898 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 34 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 34 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf28 1.97108819589 0 67.900002 0.7649969999999939 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf29 1.96091127177 0 67.979996 0.6450059999999951 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf30 1.76576233649 0 69.139999 -0.3299990000000065 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf31 1.72501183205 0 68.959999 -0.14999899999999966 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 35 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 35 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 35 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 35 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf32 1.97108819589 0 67.919998 0.7350029999999848 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf33 1.93100141548 0 67.900002 0.7649969999999939 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 24 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 24 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf34 1.95817876597 0 68.0 0.6149999999999949 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 33 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 33 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf35 1.80848474825 0 68.300003 0.5099969999999928 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf36 1.97108819589 0 67.840004 0.8549940000000049 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf37 1.97108819589 0 67.779999 0.9450014999999894 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 31 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 31 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf38 1.83063069527 0 67.239998 1.755002999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf39 1.84432575845 0 67.18 1.8449999999999847 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf40 1.84190831428 0 67.180008 1.8449879999999936 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 33 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 33 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf41 1.80918325318 0 67.159996 1.8750059999999849 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf42 1.85332574639 0 67.480003 1.3949955000000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf43 1.80848474825 0 67.220009 1.784986499999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf44 1.80918325318 0 67.220001 1.7849985000000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf45 1.84432575845 0 67.900002 0.7649969999999939 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf46 1.84432575845 0 67.319992 1.6350119999999961 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf47 1.80918325318 0 67.199997 1.8150045000000006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf48 1.84190831428 0 67.12001 1.9349850000000046 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 34 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 34 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf49 1.84190831428 0 67.259995 1.7250074999999896 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf50 1.80918325318 0 67.220001 1.7849985000000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf51 1.84190831428 0 67.220001 1.7849985000000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 33 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 33 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf52 1.85332574639 0 67.520004 1.3349939999999947 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf53 1.83541275828 0 67.139999 1.9050014999999902 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf54 1.81784272497 0 67.279991 1.6950135000000017 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf55 1.84432575845 0 67.280006 1.6949909999999946 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf56 1.81784272497 0 67.439995 1.4550075000000007 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf57 1.79991399207 0 68.319992 0.49000799999999745 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf58 1.81784272497 0 67.139999 1.9050014999999902 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf59 1.84432575845 0 67.620003 1.1849954999999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf60 1.80848474825 0 68.059998 0.5250030000000052 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf61 1.81784272497 0 67.219994 1.7850089999999952 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf62 1.80060589073 0 68.019997 0.5850044999999895 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf63 1.85332574639 0 67.199997 1.8150045000000006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf64 1.81784272497 0 67.260002 1.7249969999999948 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf65 1.80848474825 0 67.18 1.8449999999999847 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 34 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 34 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf66 1.80918325318 0 67.180008 1.8449879999999936 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf67 1.84432575845 0 67.18 1.8449999999999847 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 28 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf68 1.85332574639 0 67.659996 1.1250059999999849 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf69 1.85332574639 0 67.560005 1.274992499999989 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf70 1.81949033684 0 67.760002 0.9749969999999948 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 36 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 36 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf71 1.80848474825 0 68.099998 0.46500299999999584 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf72 1.80685699907 0 67.219994 1.7850089999999952 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 34 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 34 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf73 1.80848474825 0 67.579994 1.245008999999996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf74 1.85332574639 0 67.240005 1.7549925000000002 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf75 1.84432575845 0 67.159996 1.8750059999999849 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf76 1.80685699907 0 67.240005 1.7549925000000002 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf77 1.85332574639 0 67.18 1.8449999999999847 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf78 1.80848474825 0 67.960007 0.6749894999999881 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf79 1.84432575845 0 67.160004 1.8749939999999938 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf80 1.84432575845 0 67.639999 1.1550014999999902 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf81 1.85332574639 0 67.740005 1.0049925000000002 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf82 2.12520432933 0 66.580002 2.744997000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 36 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 36 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf83 2.12199513594 0 66.580002 2.744997000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf84 1.88161610445 0 68.900002 -0.09000200000000402 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf85 2.11656169377 0 66.5 2.864999999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf86 2.14041849205 0 66.699997 2.5650045000000006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf87 2.14041849205 0 66.479996 2.895005999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf88 1.87233994382 0 68.240005 0.5699950000000001 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 34 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 34 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf89 2.09229584083 0 67.659996 1.1250059999999849 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf90 2.09323084228 0 66.860001 2.3249984999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 24 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 24 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf91 2.13716322068 0 66.519997 2.8350044999999895 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf92 2.15254971042 0 66.699997 2.5650045000000006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf93 2.14041849205 0 66.459999 2.9250015000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf94 1.87233994382 0 68.439995 0.3700050000000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf95 2.14041849205 0 66.560005 2.774992499999989 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf96 2.15254971042 0 66.479996 2.895005999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf97 2.07867782721 0 66.979996 2.145005999999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf98 1.9408694681 0 68.139999 0.6700009999999935 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf99 2.08083245943 0 67.659996 1.1250059999999849 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar100/data/autotuner_data/tuner_promise_confs_batch220_multi.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar100/data/autotuner_data/tuner_promise_confs_batch220_multi.txt index db4c598b3a..c0bbf8d3b7 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar100/data/autotuner_data/tuner_promise_confs_batch220_multi.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar100/data/autotuner_data/tuner_promise_confs_batch220_multi.txt @@ -19,8627 +19,8627 @@ conf1 1 0 68.41 0 ----- +++++ conf1 2.37505771925 0 68.38250025 0.42749974999998985 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 3 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf2 2.53623440271 0 68.26416685 0.5458331500000014 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 3 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf3 2.50805597336 0 67.444999975 1.4475000374999922 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 5 6 promise swing_level 6 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 28 add fp32 1 relu fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf4 1.87028406725 0 67.987500325 0.6337495124999961 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf5 2.51647733529 0 67.47416665 1.4037500249999937 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 3 6 promise swing_level 6 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf6 2.15701027658 0 67.28250065 1.6912490249999905 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf7 2.50805597336 0 67.236666925 1.7599996125000033 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 5 6 promise swing_level 6 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf8 2.0928411568 0 67.31249885 1.646251724999999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf9 2.1075938688 0 67.38916625 1.5312506249999913 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf10 2.19340577772 0 67.672499775 1.1062503374999864 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 6 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf11 2.31661988654 0 67.461666075 1.4225008875 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 6 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf12 2.47612599473 0 67.45333285 1.4350007250000019 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 3 6 promise swing_level 6 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf13 2.1377321782 0 67.3199999 1.6350001499999962 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf14 2.57138969704 0 67.4158337 1.491249450000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 3 6 promise swing_level 6 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf15 1.87028406725 0 67.50083225 1.363751624999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 10 promise swing_level 5 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf16 2.29880815342 0 67.235832975 1.761250537500004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 7 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 6 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf17 2.09964245296 0 67.66749975 1.113750374999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf18 1.76017762247 0 68.563333875 0.24666612499999874 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf19 2.39071529595 0 67.270833775 1.7087493374999951 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 6 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf20 2.08191145143 0 67.6383331 1.1575003499999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf21 1.81397084963 0 68.379166875 0.4308331250000009 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 36 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 36 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf22 1.84106370533 0 68.340000175 0.4699998249999965 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf23 2.29880815342 0 67.1975014 1.8187479000000053 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 6 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf24 2.11131460224 0 67.617500825 1.1887487625000048 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf25 2.05349604879 0 67.760000125 0.9749998124999877 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 6 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf26 2.57610361521 0 67.120834175 1.9337487374999967 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 5 6 promise swing_level 6 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf27 2.34317715598 0 67.430834475 1.4687482874999986 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 6 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf28 2.72214428244 0 67.03166715 2.0674992749999888 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 6 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf29 2.89301601169 0 67.31666755 1.6399986749999869 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 5 12 promise swing_level 6 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf30 3.19819632002 0 67.2583336 1.7274995999999945 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf31 3.11489774116 0 66.6758325 2.601251249999997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf32 3.21149887597 0 66.988333075 2.132500387499995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 5 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf33 2.8343718824 0 66.82916715 2.3712492749999896 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 3 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf34 3.08278073167 0 66.5074993 2.8537510499999854 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 36 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 36 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf35 2.91202046297 0 66.9716666 2.157500099999986 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 6 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf36 2.42443446654 0 67.06583345 2.0162498249999956 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 6 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf37 3.21149887597 0 67.28166755 1.692498675000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf38 3.08955854928 0 67.415000325 1.4924995125000038 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf39 2.47459984699 0 67.189167225 1.8312491624999865 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 6 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf40 2.77230336463 0 67.3574997 1.578750449999987 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 24 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 24 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf41 2.87767186701 0 67.3449999 1.5975001499999877 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf42 2.80383297172 0 67.524166275 1.3287505874999965 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf43 2.65006901783 0 67.285000875 1.6874986875000033 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 6 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf44 2.24635069838 0 68.138333525 0.6716664750000035 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 6 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf45 2.34501496091 0 67.9958338 0.6212492999999952 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 7 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 6 12 promise swing_level 5 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf46 2.06842784342 0 68.5124997 0.29750029999999017 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 7 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 promise swing_level 3 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf47 2.47303205644 0 67.769166625 0.9612500624999996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 7 7 promise swing_level 7 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf48 2.25695785916 0 68.744167125 0.06583287499999246 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 7 7 promise swing_level 7 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf49 2.40201454134 0 68.289999575 0.5200004250000007 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 3 6 promise swing_level 7 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 promise swing_level 3 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf50 3.04549984533 0 67.280834375 1.6937484374999983 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 3 6 promise swing_level 5 7 promise swing_level 7 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf51 2.4472342416 0 68.6458321 0.1641678999999897 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 7 7 promise swing_level 7 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf52 3.04549984533 0 67.36500035 1.567499474999991 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 3 6 promise swing_level 5 7 promise swing_level 7 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 28 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf53 2.28717191157 0 68.5566669 0.2533331000000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 7 7 promise swing_level 7 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 7 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf54 2.58589275895 0 67.32000045 1.6349993250000026 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 7 7 promise swing_level 7 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf55 2.30925343798 0 68.674999775 0.1350002249999932 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 7 7 promise swing_level 5 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf56 2.65829057909 0 67.15416635 1.8837504749999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 7 7 promise swing_level 7 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf57 2.17310490806 0 68.90083285 0.20916714999999614 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 7 7 promise swing_level 7 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf58 2.60048816032 0 67.16583345 1.8662498250000041 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 4 promise swing_level 7 5 promise swing_level 3 6 promise swing_level 7 7 promise swing_level 7 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf59 3.00191034265 0 67.5566668 1.2799997999999917 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 3 6 promise swing_level 7 7 promise swing_level 7 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf60 2.34548464978 0 68.5250008 0.2849991999999958 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 7 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf61 2.47150971901 0 67.272500225 1.7062496624999923 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 4 promise swing_level 7 5 promise swing_level 7 6 promise swing_level 7 7 promise swing_level 7 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 36 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 36 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf62 2.32338025859 0 68.69833395 0.11166604999999097 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 7 7 promise swing_level 7 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf63 2.69422211471 0 67.3750006 1.5524990999999844 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 5 6 promise swing_level 7 7 promise swing_level 7 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf64 2.99066612796 0 67.264999975 1.7175000375000025 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 3 6 promise swing_level 5 7 promise swing_level 7 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf65 3.06069007119 0 67.3383319 1.6075021499999949 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 4 promise swing_level 5 5 promise swing_level 3 6 promise swing_level 5 7 promise swing_level 7 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf66 2.71872102189 0 67.231666525 1.7675002125000034 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 3 6 promise swing_level 7 7 promise swing_level 7 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf67 2.32472553286 0 67.830834 0.8687490000000011 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 7 7 promise swing_level 7 8 promise swing_level 5 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 36 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 36 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf68 3.07011861089 0 67.492499675 1.3762504874999877 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 3 6 promise swing_level 5 7 promise swing_level 7 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf69 2.24868722525 0 67.808333575 0.9024996374999859 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 6 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf70 2.62646733843 0 67.674999225 1.1025011625000047 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 6 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 7 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf71 2.7894179168 0 66.743334225 2.4999986624999977 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 6 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf72 2.30414173782 0 68.115832125 0.6941678749999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf73 2.15411706376 0 68.3475005 0.4624995000000013 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 7 7 promise swing_level 7 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 7 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf74 2.39863199297 0 68.4599999 0.3500000999999969 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 7 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 7 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf75 2.32973593459 0 68.539166125 0.27083387500000244 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 7 7 promise swing_level 7 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 35 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 35 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf76 2.40057835638 0 68.387499575 0.42250042499998985 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 7 7 promise swing_level 5 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 35 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 35 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf77 2.01952805493 0 68.548332925 0.26166707499999975 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 35 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 35 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf78 2.34411524951 0 68.360833975 0.44916602499999103 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 7 7 promise swing_level 7 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 35 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 35 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf79 2.43975624164 0 68.1208338 0.6891661999999968 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 7 7 promise swing_level 7 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 35 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 35 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf80 2.28150897566 0 67.865000675 0.8174989874999881 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 7 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 promise swing_level 3 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 promise swing_level 3 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf81 2.21393254817 0 67.914167425 0.7437488624999915 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 7 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 35 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 35 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf82 2.22691391284 0 67.674165925 1.1037511124999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 4 promise swing_level 5 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 promise swing_level 3 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf83 2.39073563264 0 67.217500525 1.7887492124999866 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 4 promise swing_level 5 5 promise swing_level 3 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf84 2.54486942683 0 67.044167125 2.048749312499993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 10 promise swing_level 3 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf85 2.7027723412 0 66.798332725 2.417500912500003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf86 2.43405117497 0 66.9374996 2.208750600000002 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 6 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv perf 25 add fp32 1 relu fp32 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf87 2.46539581098 0 66.924166125 2.228750812499996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 10 promise swing_level 7 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf88 2.37626259057 0 67.395833375 1.5212499375000021 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 6 7 promise swing_level 5 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf89 2.32815340092 0 66.822499375 2.381250937499985 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 24 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 24 add fp16 1 relu fp16 1 10 promise swing_level 5 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf90 2.52793060544 0 66.88916635 2.281250475 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 10 promise swing_level 5 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf91 2.28894152305 0 67.14416685 1.8987497249999876 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 10 promise swing_level 5 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf92 2.44523279309 0 67.788333875 0.9324991874999853 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 10 promise swing_level 5 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf93 2.24153127081 0 68.37083435 0.4391656500000011 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf94 2.22650814617 0 68.326666225 0.48333377499999985 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 6 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf95 2.06076867158 0 68.91749975 0.192500249999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 7 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf96 2.19302924138 0 68.1941669 0.6158330999999976 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 10 promise swing_level 7 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf97 2.23337310779 0 68.518332875 0.2916671249999979 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 7 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf98 2.02534949828 0 68.6783341 0.1316658999999959 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf99 2.21332231206 0 68.2274996 0.582500399999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 7 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf100 2.25931651565 0 68.43000015 0.3799998499999987 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf101 2.06949400593 0 68.44333345 0.36666654999999937 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf102 2.2004984668 0 68.4958321 0.3141678999999954 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 10 promise swing_level 5 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 7 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf103 2.2765192337 0 68.1775008 0.6324991999999924 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf104 2.00397602852 0 68.406666575 0.4033334249999939 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf105 2.2011016511 0 68.231666425 0.5783335749999964 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 33 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 33 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf106 2.04108810645 0 68.85000005 0.25999994999999104 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf107 2.35101762092 0 68.11166715 0.6983328499999942 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 5 6 promise swing_level 6 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf108 2.68065370808 0 67.47833325 1.3975001249999863 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 5 6 promise swing_level 6 7 promise swing_level 5 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 28 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf109 2.23942369165 0 68.078333675 0.4974994875000007 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 6 7 promise swing_level 5 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf110 2.20312041259 0 68.149166525 0.6608334749999983 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 6 7 promise swing_level 5 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 7 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf111 2.18700517491 0 67.6583336 1.127499599999986 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 6 7 promise swing_level 5 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 33 add fp32 1 relu fp32 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 33 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf112 2.26788525296 0 67.54250085 1.3012487250000007 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 7 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf113 2.24973124449 0 66.742500475 2.501249287499995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 34 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 34 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf114 2.52339118361 0 67.78916605 0.9312509249999863 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 7 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 3 11 promise swing_level 6 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf115 2.40176821617 0 67.24749985 1.7437502249999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 7 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 6 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf116 2.28782355526 0 68.034167075 0.5637493874999961 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 7 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf117 2.7633082884 0 66.579166775 2.746249837499988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 7 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 3 11 promise swing_level 6 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf118 2.53595978167 0 67.11583195 1.9412520749999942 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 7 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf119 2.51913896707 0 67.1366663 1.9100005499999924 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 7 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf120 2.30546481917 0 66.799166725 2.4162499124999854 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf121 2.3788370729 0 66.62500035 2.6774994750000047 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf122 2.48743423075 0 66.89666655 2.2700001749999856 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 7 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 6 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf123 2.35982183868 0 67.19249935 1.8262509749999865 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 7 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf124 2.23626974357 0 66.73666635 2.5100004750000053 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 -6 gpu conv samp 34 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 +6 gpu conv samp 34 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf125 2.34803207116 0 67.3541677 1.5837484499999874 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 7 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 5 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf126 2.04807891023 0 68.30416755 0.5058324499999941 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 7 promise swing_level 5 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 24 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 35 add fp32 1 relu fp32 1 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 24 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 35 add fp16 1 relu fp16 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf127 2.24721285034 0 68.2241665 0.5858335000000011 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 promise swing_level 3 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 promise swing_level 3 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf128 2.30282017421 0 68.010834125 0.5987488124999913 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 promise swing_level 3 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf129 2.32046444592 0 68.08666685 0.48499972499999444 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 7 promise swing_level 5 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf130 2.02243458745 0 68.274166825 0.5358331750000019 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 promise swing_level 3 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf131 2.35705109044 0 68.0874995 0.48375074999998446 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 promise swing_level 3 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf132 2.10798906805 0 68.2658333 0.5441666999999996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 7 promise swing_level 5 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf133 2.55439897409 0 68.284166075 0.5258339249999949 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf134 2.5942949653 0 68.4691676 0.3408323999999908 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 6 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf135 2.04599154054 0 68.0675001 0.5137498499999893 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 34 add fp32 1 relu fp32 1 -7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 24 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 34 add fp16 1 relu fp16 1 +7 promise swing_level 3 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 24 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf136 2.0910504708 0 68.103334025 0.45999896250000205 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 24 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 promise swing_level 3 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 24 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf137 2.65653169868 0 68.295832825 0.5141671749999915 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf138 2.66838617461 0 68.310833 0.49916699999999425 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf139 2.31686815427 0 68.097498575 0.46875213749999034 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 promise swing_level 3 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf140 2.28150897566 0 68.30500005 0.5049999499999928 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 7 promise swing_level 5 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf141 2.06220517127 0 68.068333625 0.512499562500004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 7 promise swing_level 5 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 24 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 24 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf142 2.55115278322 0 68.315833275 0.4941667249999938 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf143 2.27891897473 0 67.860833825 0.8237492624999945 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 promise swing_level 3 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf144 2.4269048594 0 68.1408337 0.6691662999999949 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf145 2.37201084 0 68.07166535 0.5075019749999896 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 promise swing_level 3 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf146 2.07901611672 0 67.745833725 0.9962494124999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 6 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 7 12 promise swing_level 7 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf147 2.57169557742 0 67.42833435 1.4724984749999948 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 7 7 promise swing_level 5 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf148 2.61671022512 0 67.271667075 1.7074993875000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf149 2.38691840733 0 67.89333325 0.7750001249999983 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 7 promise swing_level 5 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 promise swing_level 3 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf150 2.68726660865 0 67.38166645 1.542500324999999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf151 2.75313023424 0 66.829166725 2.371249912500005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 5 5 promise swing_level 3 6 promise swing_level 6 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 6 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf152 2.44421203896 0 67.06583355 2.0162496750000045 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 5 11 promise swing_level 5 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf153 2.71315690515 0 66.928333075 2.2225003874999985 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 7 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 5 11 promise swing_level 5 12 promise swing_level 6 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf154 2.45424375541 0 67.014166425 2.0937503624999962 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 5 11 promise swing_level 5 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf155 2.72056324543 0 66.601666475 2.712500287499992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 5 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf156 2.35078164361 0 68.203332725 0.606667275000001 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 5 11 promise swing_level 5 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf157 2.70212269731 0 67.020000425 2.0849993624999854 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 7 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 5 11 promise swing_level 7 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf158 2.51463103659 0 67.4966656 1.3700015999999948 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 7 5 promise swing_level 3 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 5 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf159 2.52738501267 0 67.199165975 1.8162510374999954 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 6 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 5 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf160 2.43823227722 0 66.995833975 2.1212490375 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 5 11 promise swing_level 6 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf161 2.74554576198 0 66.83416645 2.363750324999998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 7 5 promise swing_level 3 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 5 11 promise swing_level 5 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf162 2.67193777319 0 66.870833 2.308750499999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 7 5 promise swing_level 3 6 promise swing_level 7 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 5 11 promise swing_level 5 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf163 2.73800296314 0 66.894998975 2.2725015375000055 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 7 5 promise swing_level 3 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 5 11 promise swing_level 6 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf164 2.7267661543 0 66.8225006 2.381249099999998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 7 5 promise swing_level 3 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 5 11 promise swing_level 5 12 promise swing_level 7 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf165 2.76075672626 0 66.848332825 2.342500762499995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 5 5 promise swing_level 3 6 promise swing_level 6 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 5 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf166 2.8448150698 0 66.78249995 2.441250074999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 5 5 promise swing_level 3 6 promise swing_level 6 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 5 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf167 2.57972144715 0 67.78749965 0.9337505249999936 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 5 11 promise swing_level 5 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf168 2.77356187634 0 67.117499425 1.9387508624999867 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 5 5 promise swing_level 3 6 promise swing_level 6 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf169 2.6135468544 0 66.61499985 2.692500224999989 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 5 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf170 2.67432772771 0 66.898333575 2.267499637500002 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 5 5 promise swing_level 3 6 promise swing_level 6 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 36 add fp32 1 relu fp32 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 36 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf171 2.74176917487 0 66.805833825 2.4062492625000047 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 5 5 promise swing_level 3 6 promise swing_level 6 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 7 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf172 2.41315171099 0 68.26083435 0.5491656500000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 10 promise swing_level 5 11 promise swing_level 5 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf173 2.46435815706 0 67.1166663 1.9400005499999864 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 5 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf174 2.69758391979 0 68.110001175 0.699998825000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 33 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 33 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf175 2.38691840733 0 68.196666125 0.6133338750000036 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 33 add fp32 1 relu fp32 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 33 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf176 2.24383986282 0 68.36916695 0.4408330500000034 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 6 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 -12 gpu conv samp 33 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 +12 gpu conv samp 33 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf177 2.19765933076 0 68.422499625 0.38750037499999623 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 promise swing_level 3 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 promise swing_level 3 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 promise swing_level 3 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf178 2.72214428244 0 68.459166525 0.35083347499999606 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf179 2.33224924325 0 68.318333925 0.49166607499999204 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 35 add fp32 1 relu fp32 1 -7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 35 add fp16 1 relu fp16 1 +7 promise swing_level 3 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf180 2.80612856197 0 68.07750055 0.4987491750000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 5 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf181 2.33884822207 0 68.548332525 0.2616674749999902 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 7 promise swing_level 5 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf182 2.35334059034 0 68.57583365 0.23416634999998964 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 promise swing_level 3 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 promise swing_level 3 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf183 2.35728832818 0 68.34583225 0.46416774999999577 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 promise swing_level 3 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf184 2.44975134102 0 68.19499915 0.6150008499999956 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 promise swing_level 3 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf185 2.14794338234 0 68.286666825 0.5233331749999991 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 promise swing_level 3 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 promise swing_level 3 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 promise swing_level 3 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf186 2.03138077629 0 68.021665975 0.5825010374999877 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 6 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 35 add fp32 1 relu fp32 1 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 35 add fp16 1 relu fp16 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf187 2.37201084 0 68.04250045 0.5512493249999864 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 promise swing_level 3 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf188 2.43405117497 0 68.443333375 0.366666625000002 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 promise swing_level 3 -7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 promise swing_level 3 +7 promise swing_level 3 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf189 2.62188541175 0 68.156666 0.6533339999999953 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 6 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 33 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 33 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf190 2.57499429156 0 68.14250085 0.6674991499999919 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 34 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv samp 34 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf191 2.63165658722 0 67.46333265 1.4200010249999977 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 7 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf192 2.51755935406 0 67.6716662 1.1075006999999886 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 5 7 promise swing_level 3 8 promise swing_level 7 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf193 2.8270161547 0 67.55666625 1.2800006249999853 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 5 9 promise swing_level 3 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf194 2.57169557742 0 67.467499925 1.4137501124999972 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 7 promise swing_level 5 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf195 2.42272071258 0 68.200000575 0.6099994249999924 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 22 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf196 2.78748129684 0 67.642499575 1.1512506374999916 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 7 9 promise swing_level 3 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf197 2.66332880981 0 68.112499175 0.6975008250000002 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf198 2.85400169936 0 67.40249935 1.5112509749999958 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 7 9 promise swing_level 3 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf199 2.56068287183 0 67.581666375 1.2425004374999986 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 7 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf200 2.74399101335 0 67.688332775 1.0825008374999854 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 6 7 promise swing_level 3 8 promise swing_level 7 9 promise swing_level 3 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf201 2.8270161547 0 67.56416655 1.268750175000001 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 7 9 promise swing_level 3 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf202 2.70672890918 0 67.71833325 1.037500124999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 6 7 promise swing_level 3 8 promise swing_level 7 9 promise swing_level 3 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 28 add fp32 1 relu fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf203 2.73113831736 0 67.914167075 0.743749387500003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 7 9 promise swing_level 3 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf204 2.53893801437 0 67.8308334 0.8687498999999903 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 7 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf205 2.84044483467 0 67.355833275 1.5812500875000026 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 7 9 promise swing_level 3 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf206 2.74903695156 0 67.823333325 0.8800000125000054 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 7 9 promise swing_level 3 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf207 2.57641061818 0 67.9416664 0.702500399999991 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf208 2.68726660865 0 67.3033335 1.6599997500000043 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf209 2.77099135212 0 66.701666525 2.562500212500005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 5 7 promise swing_level 7 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf210 2.91982580408 0 66.97000005 2.159999925000001 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 7 promise swing_level 5 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf211 2.97799042547 0 66.63000055 2.6699991749999867 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 5 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf212 2.75820987186 0 67.719166025 1.0362509624999845 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf213 2.62330483292 0 66.806666125 2.405000812499985 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 7 promise swing_level 5 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf214 2.5009809531 0 67.222500225 1.781249662499988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 7 promise swing_level 5 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf215 2.96323309553 0 66.7391666 2.5062500999999884 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf216 2.43278700387 0 68.090834075 0.47874888749998945 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf217 2.78389184203 0 66.804166125 2.408750812500003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 5 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf218 2.97799042547 0 66.671666175 2.607500737499997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf219 2.74554576198 0 66.648332575 2.6425011374999983 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf220 2.46435815706 0 67.119166925 1.9362496124999922 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 5 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf221 2.97799042547 0 66.9541663 2.183750549999999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf222 3.0079504835 0 66.691667 2.577499500000002 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf223 2.12997178948 0 68.089165825 0.4812512625000025 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 7 promise swing_level 7 8 promise swing_level 6 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf224 2.25059599383 0 68.511666475 0.2983335249999982 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 35 add fp32 1 relu fp32 1 -7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 32 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 35 add fp16 1 relu fp16 1 +7 promise swing_level 3 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 32 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf225 2.43379823566 0 68.164166025 0.6458339749999965 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 34 add fp32 1 relu fp32 1 -7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 34 add fp16 1 relu fp16 1 +7 promise swing_level 3 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf226 2.26164370678 0 68.23999995 0.5700000499999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 -6 gpu conv samp 32 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +6 gpu conv samp 32 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf227 2.39480993653 0 68.24666695 0.5633330499999915 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 7 promise swing_level 5 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf228 2.25674038528 0 68.42666705 0.38333294999999057 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf229 2.33108858115 0 68.3566658 0.4533341999999948 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 promise swing_level 3 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 promise swing_level 3 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf230 2.37602151851 0 68.398333 0.4116670000000028 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 35 add fp32 1 relu fp32 1 -7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 35 add fp16 1 relu fp16 1 +7 promise swing_level 3 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf231 2.43379823566 0 68.19166635 0.6183336499999911 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 35 add fp32 1 relu fp32 1 -7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 35 add fp16 1 relu fp16 1 +7 promise swing_level 3 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf232 1.99328791119 0 67.90249825 0.7612526250000045 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 5 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf233 2.37201084 0 68.06000045 0.5249993249999889 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 promise swing_level 3 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf234 2.87179095196 0 67.336666075 1.6100008875 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 7 promise swing_level 5 8 promise swing_level 7 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf235 2.56395337649 0 67.940833225 0.7037501624999862 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 7 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf236 2.69086867835 0 68.08166695 0.4924995749999965 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf237 2.61213797048 0 67.39833245 1.5175013249999978 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 6 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf238 2.63981371478 0 67.51000045 1.3499993249999847 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 7 promise swing_level 5 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf239 2.77454759585 0 67.6616658 1.1225013000000033 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 7 9 promise swing_level 3 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf240 2.86903521746 0 67.2466673 1.744999049999997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 7 8 promise swing_level 6 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf241 2.82431725843 0 67.699165675 1.06625148749999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 7 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 7 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf242 2.76553797403 0 67.84500005 0.847499925000001 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf243 2.66686694961 0 68.373332925 0.4366670749999969 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf244 2.63165658722 0 67.32166615 1.632500774999997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 7 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf245 2.6201255072 0 68.32666685 0.48333315000000143 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf246 2.80444849304 0 67.76583295 0.9662505749999895 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 7 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf247 2.68726660865 0 67.57166695 1.2574995750000042 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf248 2.79791540183 0 67.576667675 1.249998487500001 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 6 9 promise swing_level 3 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf249 2.63981371478 0 67.9075001 0.7537498500000055 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf250 2.52820349017 0 67.290833025 1.6787504624999983 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 5 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf251 2.36144773265 0 67.116666 1.9400010000000023 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 8 promise swing_level 7 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 36 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 36 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf252 2.56395337649 0 67.806666 0.9050009999999844 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 7 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf253 2.77838761922 0 67.79916685 0.9162497249999859 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 7 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf254 2.85806496463 0 67.752499175 0.9862512374999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 7 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf255 2.68726660865 0 67.329999325 1.6200010124999906 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf256 2.7894179168 0 68.21083385 0.5991661499999964 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf257 2.3429427499 0 67.4608339 1.423749149999999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 7 promise swing_level 3 8 promise swing_level 7 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf258 2.33083725661 0 67.0258343 2.0762485499999954 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 6 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 36 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 36 add fp16 1 relu fp16 1 12 promise swing_level 7 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf259 2.37341303641 0 67.9441676 0.6987485999999947 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 10 promise swing_level 5 11 promise swing_level 3 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf260 2.37622240849 0 67.0383343 2.057498549999991 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 5 -12 gpu conv perf 21 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf261 2.54215317493 0 66.5350008 2.812498799999986 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 7 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf262 2.37622240849 0 67.428333225 1.4725001624999905 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 7 11 promise swing_level 3 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf263 2.46435815706 0 67.113333525 1.9449997124999925 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 5 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf264 2.31686815427 0 68.444167025 0.3658329749999979 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 promise swing_level 3 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf265 2.9515631192 0 68.09083345 0.4787498249999871 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf266 2.8327149358 0 68.0783337 0.4974994499999923 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 7 12 promise swing_level 3 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf267 2.87252475149 0 68.15833315 0.6516668499999924 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf268 2.71688129157 0 68.427500375 0.3824996250000027 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf269 2.37602151851 0 68.38166815 0.42833185000000074 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 35 add fp32 1 relu fp32 1 -7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 35 add fp16 1 relu fp16 1 +7 promise swing_level 3 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf270 2.89441634208 0 68.2133343 0.596665699999997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf271 2.24299819745 0 68.0058332 0.6062501999999981 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 promise swing_level 3 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 promise swing_level 3 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf272 2.66838617461 0 68.44833375 0.3616662499999933 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf273 2.52956879803 0 68.377500375 0.43249962499999983 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf274 2.6924153803 0 68.112499825 0.6975001749999962 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 7 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf275 2.70459300704 0 68.1349992 0.6750008000000008 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 7 12 promise swing_level 3 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf276 2.98844009626 0 68.09833335 0.4674999749999884 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf277 2.78651399491 0 68.420000075 0.38999992499999225 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf278 2.4114952476 0 68.365833375 0.4441666250000026 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf279 2.37201084 0 68.103332125 0.4600018125000034 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 promise swing_level 3 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf280 2.73552452381 0 68.215832475 0.5941675249999975 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf281 2.9515631192 0 68.074999775 0.5025003375000026 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf282 2.63894616846 0 68.11999985 0.6900001499999974 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf283 2.35728832818 0 67.95166605 0.6875009249999948 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 promise swing_level 3 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf284 2.37201084 0 68.10499975 0.45750037499998797 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 promise swing_level 3 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf285 2.62391712649 0 68.1241669 0.6858330999999908 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 5 12 promise swing_level 3 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf286 2.89441634208 0 68.169165775 0.6408342250000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf287 2.61585779532 0 68.303333875 0.5066661249999896 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 7 12 promise swing_level 3 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf288 2.79496597392 0 67.429167475 1.471248787499995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 7 9 promise swing_level 3 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf289 2.63832684154 0 67.27499945 1.7025008250000013 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 7 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf290 2.95078838814 0 67.2074994 1.80375089999999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 7 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf291 2.44123970122 0 67.852499575 0.836250637500001 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 promise swing_level 3 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf292 2.40201454134 0 67.65 1.1399999999999864 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 7 promise swing_level 5 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 promise swing_level 3 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf293 2.65031892351 0 67.871666125 0.8075008124999883 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 6 7 promise swing_level 5 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf294 2.72584052477 0 68.68250065 0.12749935000000223 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf295 2.71530636044 0 67.6008325 1.2137512500000014 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf296 2.60217358754 0 67.502498975 1.361251537500003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 7 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf297 2.68603383232 0 67.761667075 0.9724993874999868 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 6 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf298 2.6671706562 0 67.214167125 1.7937493124999904 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 6 7 promise swing_level 5 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf299 2.87973588171 0 67.665832575 1.1162511375000008 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 7 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf300 2.77290511901 0 67.469167475 1.4112487874999857 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 7 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf301 2.69784286772 0 67.770833975 0.9587490374999916 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 7 7 promise swing_level 5 8 promise swing_level 7 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf302 2.81005656196 0 68.621666875 0.1883331249999941 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf303 2.73392791052 0 67.5833338 1.2399992999999867 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 7 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf304 2.66808219115 0 67.44249955 1.4512506750000043 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf305 2.76553797403 0 67.71166705 1.047499424999991 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf306 2.63832684154 0 67.58333245 1.2400013249999944 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 7 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf307 2.64328961165 0 67.7616663 0.9725005499999924 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 7 7 promise swing_level 5 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf308 2.598804915 0 67.261665975 1.7225010374999954 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf309 2.74554576198 0 67.51333375 1.3449993749999933 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf310 2.5162069759 0 67.771666025 0.9575009624999922 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 7 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf311 2.86593402479 0 67.42416675 1.4787498749999983 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf312 2.69603127555 0 67.569999475 1.2600007874999903 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 7 promise swing_level 5 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf313 2.82531098889 0 67.73583315 1.0112502749999877 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 7 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf314 2.34548464978 0 67.497500225 1.3687496625000009 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf315 2.63659427168 0 67.012499475 2.096250787499997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf316 2.68393045393 0 67.2825005 1.6912492499999985 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf317 2.67193777319 0 67.39333325 1.5250001249999983 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf318 2.83671777233 0 66.8116663 2.3975005499999966 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 7 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf319 2.18060798012 0 67.0625002 2.0212496999999914 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf320 2.61014868996 0 67.20416645 1.8087503249999912 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf321 2.66005179014 0 66.958334 2.1774990000000045 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf322 2.60700115443 0 67.239167175 1.7562492374999863 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf323 2.83671777233 0 67.2816676 1.6924985999999862 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf324 2.6250199755 0 67.5316668 1.3174998000000002 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf325 2.75820987186 0 67.05083325 2.038750125 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf326 2.60869503736 0 66.83833275 2.3575008749999853 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf327 2.97799042547 0 66.81000065 2.3999990249999854 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf328 2.64155051977 0 66.696666425 2.5700003624999894 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf329 2.56864004444 0 67.3250001 1.6274998499999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 5 7 promise swing_level 7 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf330 2.42858256761 0 67.875000775 0.8024988374999893 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 7 promise swing_level 7 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf331 2.51013921093 0 66.80416755 2.408748674999991 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 7 promise swing_level 7 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf332 2.96911844158 0 66.915000675 2.2424989874999923 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf333 2.95482143693 0 66.87249935 2.3062509749999975 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf334 2.68393045393 0 67.3308334 1.6187498999999903 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf335 2.86388974027 0 66.7666666 2.4650001000000046 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf336 2.70459300704 0 66.9408324 2.2037513999999874 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 24 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 24 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf337 2.6201255072 0 66.821668275 2.382497587500005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf338 2.46435815706 0 67.154999775 1.8825003375000051 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 5 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf339 2.88600499928 0 66.90416605 2.258750924999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 6 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf340 2.86388974027 0 67.17333305 1.855000425 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf341 2.52952326401 0 66.80916665 2.401250025000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 31 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 31 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 7 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 10 promise swing_level 5 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf342 2.40105005625 0 68.099999 0.4650014999999996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 35 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 35 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf343 2.26423105029 0 68.48583275 0.32416724999999647 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 promise swing_level 3 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 promise swing_level 3 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 10 promise swing_level 7 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf344 2.41830124015 0 68.591667425 0.21833257499999947 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 35 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 35 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf345 2.50834697069 0 68.194166975 0.6158330249999949 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf346 2.12276445811 0 67.951666675 0.6874999874999972 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 promise swing_level 3 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 35 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 promise swing_level 3 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 35 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf347 2.49498631275 0 67.965834025 0.666248962499985 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 10 promise swing_level 7 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf348 2.57641061818 0 68.42250005 0.38749995000000015 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf349 2.55877116226 0 68.15833335 0.65166664999999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf350 2.37201084 0 68.203333125 0.6066668749999963 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 promise swing_level 3 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 promise swing_level 3 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 7 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf351 2.23879932414 0 68.045833175 0.5462502374999971 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 promise swing_level 3 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 promise swing_level 3 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 10 promise swing_level 7 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 35 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 35 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf352 2.35334059034 0 68.3741669 0.43583309999999076 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 promise swing_level 3 -7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 35 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 33 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 promise swing_level 3 +7 promise swing_level 3 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 35 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 33 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf353 2.47407702932 0 68.192499575 0.6175004249999972 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 35 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 35 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf354 2.43405117497 0 68.37416625 0.4358337499999948 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 promise swing_level 3 -7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 22 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 promise swing_level 3 +7 promise swing_level 3 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 22 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf355 2.41025439167 0 68.07666665 0.5000000249999843 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 35 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 35 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf356 2.49162417805 0 68.2083328 0.6016672000000028 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 24 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 24 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf357 2.46388285753 0 68.27083355 0.5391664499999905 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 35 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 35 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf358 2.46998925466 0 68.320832525 0.48916747499999647 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 7 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf359 2.37201084 0 68.073332775 0.505000837499999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 promise swing_level 3 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf360 2.35428684105 0 68.187499575 0.6225004249999927 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf361 2.61671022512 0 67.40666675 1.5049998749999958 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf362 2.81766312925 0 67.270000275 1.7099995874999934 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf363 2.81766312925 0 67.518333775 1.3374993374999917 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 5 9 promise swing_level 7 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 28 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf364 2.41584832296 0 67.344999925 1.5975001125000006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 7 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 33 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 33 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf365 2.77454759585 0 67.6066669 1.2049996500000049 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 7 9 promise swing_level 3 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf366 2.31305452911 0 68.125000275 0.6849997249999916 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf367 2.50700446917 0 67.389166675 1.5312499874999972 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 5 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf368 2.677027917 0 68.094999875 0.4725001874999961 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf369 2.69572096115 0 67.214166625 1.7937500624999885 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 5 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf370 2.81766312925 0 67.5283323 1.3225015499999913 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 7 8 promise swing_level 5 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf371 2.95699764831 0 67.401666675 1.512499987499993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 5 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf372 2.94244713473 0 67.469166325 1.41125051249999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 5 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf373 2.6848278343 0 67.49083285 1.378750724999989 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 5 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 28 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 12 promise swing_level 7 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf374 2.49653771764 0 67.6574999 1.1287501499999877 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 7 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf375 2.7326785313 0 67.45000095 1.43999857499999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 8 promise swing_level 7 9 promise swing_level 3 -10 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf376 2.38691840733 0 68.324999375 0.4850006249999922 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf377 2.68726660865 0 67.2974997 1.6687504499999903 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf378 2.98653470492 0 67.2541669 1.733749649999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf379 2.61213797048 0 67.379166475 1.546250287499987 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 6 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf380 2.68726660865 0 67.303333325 1.6600000124999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf381 2.4010295437 0 67.178333475 1.8474997874999914 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 6 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 5 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf382 2.74238450459 0 66.757499875 2.4787501875000046 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 6 7 promise swing_level 3 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf383 2.33108858115 0 67.48583315 1.3862502749999877 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 7 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 36 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 36 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf384 3.08156388144 0 66.6666662 2.615000700000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 6 7 promise swing_level 3 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf385 2.7913572296 0 67.2850007 1.6874989499999984 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf386 2.16698913856 0 68.670834575 0.13916542499999596 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 5 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf387 2.55490984105 0 67.294165775 1.6737513375000006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 6 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf388 2.87281837631 0 66.972499825 2.156250262499995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf389 2.39934870692 0 66.9849998 2.1375002999999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 5 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf390 2.50566340393 0 67.360833225 1.573750162500005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 6 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf391 2.48510279932 0 67.416666775 1.489999837500001 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 6 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf392 2.26058860457 0 68.000833275 0.6137500874999873 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 10 promise swing_level 5 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf393 2.78844927057 0 66.92750075 2.2237488750000054 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf394 2.8024909535 0 66.9708332 2.158750199999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf395 2.81202468864 0 66.72666665 2.525000024999997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 7 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf396 2.61671022512 0 67.1774995 1.8487507500000007 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf397 2.5225758185 0 67.3508333 1.5887500499999874 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 6 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf398 2.26208058895 0 67.62333415 1.1799987749999872 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 6 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv perf 24 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv perf 24 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf399 2.4000658493 0 68.085000225 0.4874996624999923 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 7 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf400 2.66686694961 0 67.180833525 1.8437497124999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 7 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf401 2.82531098889 0 66.7841669 2.438749649999991 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 -7 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf402 2.45929055682 0 67.12916695 1.9212495749999974 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 7 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf403 2.46435815706 0 67.0841661 1.9887508499999882 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 5 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf404 2.04718380192 0 68.3308331 0.47916689999998996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf405 2.95886552531 0 68.095000125 0.47249981249999706 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 6 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf406 2.96473351791 0 68.1433336 0.6666663999999912 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf407 2.58308827743 0 68.051666925 0.5374996124999853 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 6 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf408 2.82735743488 0 67.9850007 0.6374989499999941 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 6 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf409 2.53276025822 0 68.106667025 0.45499946250000534 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 7 5 promise swing_level 5 6 promise swing_level 3 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf410 2.86944531077 0 68.24083315 0.5691668499999963 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 6 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf411 2.59539701657 0 67.9800004 0.6449994000000032 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv fp16 1 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv fp16 1 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf412 2.78554736409 0 68.199999575 0.6100004249999899 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf413 2.82735743488 0 68.08666645 0.48500032500000145 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 6 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf414 2.47303205644 0 68.31666655 0.49333345000000295 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 7 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf415 2.34251312722 0 68.362500575 0.44749942499999806 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 promise swing_level 3 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf416 2.95886552531 0 68.080833475 0.4937497874999863 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 6 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf417 2.77230336463 0 68.097500375 0.4687494375000014 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf418 2.37201084 0 68.1208334 0.6891666000000015 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 -6 gpu conv samp 33 add fp32 1 relu fp32 1 -7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 +6 gpu conv samp 33 add fp16 1 relu fp16 1 +7 promise swing_level 3 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf419 2.82468627671 0 68.095000175 0.4724997375000015 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf420 2.7468337835 0 68.291666025 0.5183339749999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf421 2.92128257937 0 68.1516669 0.6583331000000016 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf422 2.598804915 0 68.0091667 0.6012499500000033 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 6 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf423 2.95699764831 0 67.29333385 1.6749992250000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf424 2.81236235863 0 67.680001225 1.0949981624999978 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 6 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf425 2.84446956237 0 67.625833175 1.1762502374999997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf426 3.17736982503 0 67.27416715 1.7037492749999998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 3 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf427 3.21149887597 0 67.275834075 1.701248887499986 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 3 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf428 2.66211789351 0 68.308333225 0.5016667749999982 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 6 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf429 2.63165658722 0 67.554167025 1.2837494624999977 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 3 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf430 2.92231534168 0 67.26916705 1.7112494250000054 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 6 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf431 2.70334428627 0 68.263332925 0.5466670749999963 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf432 2.598804915 0 67.6224995 1.1812507499999896 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf433 3.07448469058 0 67.2341656 1.763751599999999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 3 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf434 2.98653470492 0 67.4566662 1.4300006999999937 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf435 2.81766312925 0 67.38249995 1.5412500750000007 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf436 2.784857324 0 67.332500425 1.6162493624999854 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 7 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf437 2.96323309553 0 67.520000275 1.3349995874999934 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf438 3.01666779873 0 67.36500075 1.5674988750000054 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 8 promise swing_level 3 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf439 2.79657904102 0 67.37666745 1.5499988249999959 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf440 2.57527743229 0 67.50416575 1.358751374999997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf441 3.37129417123 0 67.350000175 1.589999737499987 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 3 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 28 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf442 2.68726660865 0 67.319165375 1.636251937499999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf443 3.28200480723 0 67.244999525 1.747500712499999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 3 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf444 2.66838617461 0 67.8016667 0.9124999499999973 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 22 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf445 2.44921762504 0 67.1366661 1.9100008499999959 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 5 12 promise swing_level 7 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf446 2.21637686133 0 68.645833525 0.16416647499999615 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 5 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf447 2.35679413678 0 68.235832575 0.5741674249999932 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 7 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf448 2.5225758185 0 66.914167 2.2437494999999856 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 6 10 promise swing_level 3 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf449 2.54215317493 0 67.00583275 2.1062508750000006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 5 7 promise swing_level 5 -8 gpu conv samp 32 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 5 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf450 2.45827953674 0 67.141666275 1.9025005874999863 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 5 12 promise swing_level 6 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf451 2.46435815706 0 67.74916755 0.9912486750000014 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf452 2.44847081324 0 67.185832425 1.8362513624999934 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 5 12 promise swing_level 5 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf453 2.46435815706 0 67.095834 1.9712490000000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 5 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf454 2.45827953674 0 67.152500375 1.8862494374999912 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 32 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 6 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar100/data/autotuner_data/tuner_promise_confs_batch220_single.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar100/data/autotuner_data/tuner_promise_confs_batch220_single.txt index 56b744f59c..0965282436 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar100/data/autotuner_data/tuner_promise_confs_batch220_single.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar100/data/autotuner_data/tuner_promise_confs_batch220_single.txt @@ -19,5606 +19,5606 @@ conf1 1 0 68.41 0 ----- +++++ conf1 2.51040826788 0 68.224999525 0.5850004749999954 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 6 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf2 2.64739797401 0 68.521 0.2889999999999958 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 4 6 promise swing_level 3 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf3 2.33585465224 0 68.612999875 0.19700012499999675 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 7 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf4 2.42883442392 0 68.56000005 0.2499999499999973 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 6 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf5 2.38333565435 0 68.453499475 0.3565005249999956 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf6 2.53919033971 0 68.2105004 0.5994995999999958 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf7 2.47920581413 0 68.293499875 0.5165001250000018 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 7 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf8 2.86526200945 0 68.009501175 0.6007482374999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 6 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf9 3.16727211097 0 67.865499375 0.8167509374999966 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf10 2.38620909921 0 68.48299925 0.3270007499999906 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf11 2.66598153365 0 68.4529997 0.35700029999999006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 4 6 promise swing_level 3 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf12 2.48021223602 0 68.08249915 0.4912512749999891 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 6 6 promise swing_level 7 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 7 12 promise swing_level 3 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf13 2.2728371771 0 68.5244998 0.28550019999999565 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 4 7 promise swing_level 5 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf14 2.73925721687 0 67.935000175 0.7124997374999964 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 5 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf15 2.39287291206 0 68.564500475 0.24549952499999395 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 4 6 promise swing_level 7 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf16 2.50935479078 0 68.453000075 0.356999924999991 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf17 2.41998790747 0 67.952 0.6869999999999976 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 6 12 promise swing_level 6 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf18 2.55655994279 0 68.269500175 0.540499824999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf19 2.8353727021 0 67.900000575 0.7649991374999843 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf20 2.35239509998 0 68.476500325 0.3334996749999931 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 6 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf21 2.97206989009 0 67.907000375 0.7544994375000016 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 7 6 promise swing_level 6 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf22 2.39287291206 0 68.54099995 0.2690000499999968 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf23 2.77742661722 0 68.33700015 0.4729998500000022 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 4 6 promise swing_level 3 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf24 2.94136931262 0 67.909 0.7514999999999858 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf25 2.78389184203 0 68.155999625 0.6540003749999898 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 4 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf26 2.61126425455 0 68.14399935 0.6660006499999952 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 6 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf27 2.30614585285 0 68.692999625 0.11700037499999782 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 7 5 promise swing_level 6 6 promise swing_level 7 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf28 2.3399679193 0 68.44599995 0.3640000499999957 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 4 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf29 2.42784828494 0 68.37700035 0.43299964999999363 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 4 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf30 2.43575989148 0 67.946500375 0.6952494374999958 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 6 12 promise swing_level 6 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf31 2.39287291206 0 68.250000325 0.5599996749999946 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf32 2.71131161663 0 68.227000575 0.5829994249999914 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 4 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 31 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 31 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf33 2.29462272711 0 68.4460001 0.3639998999999904 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 6 6 promise swing_level 7 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf34 2.55988986521 0 68.39349995 0.4165000499999906 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 6 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf35 2.66598153365 0 68.35000015 0.459999849999997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 4 6 promise swing_level 3 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf36 2.26249942611 0 68.6709999 0.1390000999999984 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 3 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf37 2.66598153365 0 68.438499675 0.37150032499999386 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf38 3.17990432187 0 67.597500225 1.218749662499988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 4 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf39 3.06069007119 0 67.607999825 1.2030002625000051 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 5 11 promise swing_level 3 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf40 2.91734050457 0 67.7769994 0.9495009000000039 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 4 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 10 promise swing_level 3 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf41 2.65768727206 0 68.002000625 0.6119990625000042 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 6 6 promise swing_level 3 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf42 2.46744124794 0 68.461999475 0.3480005249999977 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 4 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf43 2.7913572296 0 67.13350025 1.914749624999999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 4 7 promise swing_level 4 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 7 10 promise swing_level 3 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf44 3.15058446114 0 67.49899975 1.366500375000001 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf45 2.63778207427 0 67.881999475 0.7920007874999939 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 5 7 promise swing_level 5 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 6 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf46 2.71101085021 0 67.620000775 1.1849988375000038 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 6 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 -12 gpu conv samp 36 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv samp 36 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf47 3.18413748054 0 67.567499725 1.2637504124999879 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf48 2.70980845151 0 67.804499625 0.9082505624999868 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf49 3.16727211097 0 67.400499675 1.5142504874999858 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf50 2.83975602467 0 68.014499975 0.5932500374999847 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 6 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf51 2.46030240886 0 67.054001125 2.033998312499996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 4 promise swing_level 7 5 promise swing_level 3 6 promise swing_level 6 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 33 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 33 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf52 3.10197096633 0 67.46550035 1.416749474999996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 6 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf53 2.79691301153 0 67.235999675 1.7610004874999916 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 34 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 34 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf54 2.87767186701 0 67.84250015 0.8512497749999852 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf55 3.22580091231 0 67.41249965 1.4962505249999936 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf56 2.60869503736 0 68.183498925 0.6265010750000016 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 5 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 4 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf57 2.7861548823 0 67.135999425 1.9110008625000034 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 4 7 promise swing_level 4 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 6 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf58 3.17736982503 0 67.35200105 1.5869984249999973 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf59 3.06696931653 0 67.778500575 0.9472491375000018 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf60 2.66005179014 0 67.911000025 0.7484999624999844 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 5 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf61 2.74715597774 0 67.2534998 1.7347502999999946 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 -10 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf62 3.23277667874 0 67.317500325 1.6387495124999987 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 6 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf63 3.27123220779 0 67.3209996 1.633500600000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf64 3.19136883455 0 67.49099985 1.3785002250000034 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf65 3.13078994103 0 67.591000325 1.228499512500001 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 6 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf66 3.05403818775 0 67.33300015 1.6154997749999893 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf67 2.30523789732 0 67.5564998 1.2802502999999987 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 7 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 7 10 promise swing_level 3 -11 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf68 3.13449118459 0 67.386001 1.5359985000000052 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 4 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf69 2.40226091704 0 67.4169997 1.489500449999987 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 4 7 promise swing_level 4 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 6 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf70 3.21841285718 0 67.31149985 1.6477502249999887 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf71 2.70334428627 0 67.23650065 1.7602490250000002 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 6 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 5 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf72 3.15766404713 0 67.505501025 1.3567484624999935 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf73 2.88914980501 0 67.114500075 1.9432498875000022 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 6 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf74 2.77099135212 0 67.810999325 0.8985010124999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 10 promise swing_level 5 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf75 2.82900811804 0 67.75949955 0.9757506749999934 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf76 2.54865421072 0 67.660499625 1.124250562499995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 4 7 promise swing_level 4 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 6 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf77 2.64358797163 0 67.413999975 1.494000037500001 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 6 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf78 2.53623440271 0 68.0190003 0.5864995499999921 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 6 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf79 2.75631632863 0 67.55549925 1.2817511249999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf80 3.26098407763 0 67.275999925 1.7010001125000045 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf81 3.19136883455 0 67.395499975 1.5217500374999844 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf82 2.42858256761 0 68.3854992 0.42450079999999846 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 4 7 promise swing_level 4 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf83 3.18413748054 0 67.44699995 1.4445000749999863 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf84 3.11773120146 0 67.452000425 1.4369993625000035 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf85 2.52577234538 0 67.312999175 1.6455012374999924 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 4 7 promise swing_level 4 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv samp 34 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +10 gpu conv samp 34 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 6 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf86 2.62301103349 0 67.9765009 0.6502486499999875 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 6 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf87 3.20849249875 0 67.4209993 1.4835010499999868 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf88 2.88357691762 0 67.615499625 1.1917505624999976 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf89 3.18413748054 0 67.376499975 1.5502500374999926 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 4 promise swing_level 3 5 promise swing_level 3 6 promise swing_level 3 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf90 2.52661249258 0 67.158499925 1.8772501124999934 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 6 promise swing_level 4 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf91 2.67193777319 0 67.257499325 1.7287510124999983 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf92 3.12424692556 0 67.45599985 1.4310002249999982 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 6 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf93 3.15058446114 0 67.3530003 1.5854995499999873 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 10 promise swing_level 3 11 promise swing_level 6 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf94 3.10197096633 0 67.47450045 1.4032493250000044 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf95 3.06971621323 0 67.656500575 1.1302491375000017 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf96 3.21712342549 0 67.3034996 1.6597506000000024 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 7 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf97 3.14934879612 0 67.36999975 1.5600003749999871 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 7 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf98 2.89158728487 0 67.83200015 0.8669997749999965 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 5 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf99 3.15058446114 0 67.428999825 1.4715002624999869 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf100 2.6924153803 0 67.7579998 0.978000300000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf101 3.02967561599 0 67.48050005 1.3942499249999898 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 7 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf102 3.08596259825 0 67.43799995 1.4580000749999869 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 6 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf103 2.98431481695 0 67.743499525 0.9997507124999885 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf104 2.61155542824 0 67.3849999 1.5375001499999996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 -10 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 6 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf105 3.17736982503 0 67.345498625 1.5967520624999878 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 6 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf106 3.2253566726 0 67.4044994 1.508250899999986 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 6 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf107 2.54865421072 0 67.576999525 1.2495007124999873 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 6 7 promise swing_level 3 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf108 3.15766404713 0 67.258499575 1.727250637499992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf109 3.00721022547 0 67.6505005 1.1392492499999847 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 3 7 promise swing_level 6 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf110 3.20118342405 0 67.466499725 1.4152504124999865 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf111 2.77646627979 0 68.359501075 0.45049892499999944 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf112 2.70214867707 0 67.787500125 0.9337498125000039 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 6 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 4 11 promise swing_level 3 12 promise swing_level 5 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf113 2.68938808892 0 68.502000225 0.3079997749999933 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 4 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf114 2.99928340424 0 67.334499125 1.6132513125000045 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv samp 36 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv samp 36 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf115 2.39912340782 0 67.08899935 1.981500975000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 4 7 promise swing_level 6 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 6 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf116 2.97206989009 0 67.663999475 1.1190007874999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 6 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf117 3.06422733148 0 67.5554998 1.2817502999999846 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 6 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf118 3.21841285718 0 67.370500125 1.559249812499985 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf119 3.09234607698 0 67.5615007 1.2727489500000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf120 2.75820987186 0 67.977000475 0.6494992874999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf121 2.9694948958 0 67.376999375 1.5495009374999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 4 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 33 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 33 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf122 2.9056379369 0 67.859499775 0.8257503374999899 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf123 3.21668157244 0 67.34750025 1.593749625000001 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 7 12 promise swing_level 6 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf124 2.86388974027 0 67.15849975 1.8772503749999885 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf125 2.30259377265 0 67.177999675 1.848000487500002 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 7 7 promise swing_level 4 -8 gpu conv samp 31 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 6 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf126 3.20118342405 0 67.2764996 1.700250600000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 4 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf127 3.24460758361 0 67.331000525 1.618499212500005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 7 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf128 3.15766404713 0 67.63150045 1.1677493249999884 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf129 2.76717173232 0 67.8210003 0.8834995500000034 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 5 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf130 3.15766404713 0 67.396000275 1.5209995874999862 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf131 3.0079504835 0 67.4764999 1.400250150000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf132 2.88357691762 0 67.66699965 1.114500525000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf133 2.72272445653 0 67.19349885 1.8247517249999987 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 7 7 promise swing_level 4 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 6 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf134 3.11489774116 0 67.537499825 1.3087502624999985 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 6 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf135 3.11773120146 0 67.527999325 1.3230010125000007 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf136 2.84245577384 0 67.839000675 0.8564989875000038 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf137 3.28922604424 0 67.2785002 1.6972497000000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf138 2.6710489875 0 67.9570003 0.6794995499999885 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 3 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf139 3.2605300953 0 67.368999625 1.5615005624999938 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 6 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf140 2.95482143693 0 67.636500175 1.1602497375000027 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf141 2.81935907333 0 67.648499 1.1422514999999933 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 24 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv perf 24 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf142 3.24329708136 0 67.296999875 1.6695001874999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 5 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf143 2.83671777233 0 68.074999775 0.5025003375000026 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf144 3.1601671916 0 67.5025009 1.3612486499999932 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 4 12 promise swing_level 6 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf145 3.19093402719 0 67.35399995 1.5840000749999916 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 6 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf146 2.61554131997 0 67.462500225 1.4212496624999957 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 4 7 promise swing_level 4 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 7 10 promise swing_level 6 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf147 2.82900811804 0 67.409500275 1.500749587499996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 4 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 10 promise swing_level 3 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 33 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 33 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf148 2.82866643927 0 67.8245001 0.8782498500000031 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 6 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf149 3.17736982503 0 67.343500675 1.5997489874999928 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 6 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf150 2.79691301153 0 68.129498825 0.6805011749999977 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf151 2.84245577384 0 67.947499525 0.6937507124999982 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf152 3.19819632002 0 67.4495002 1.4407496999999907 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 6 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf153 2.91734050457 0 67.641499825 1.1527502624999997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 4 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf154 3.15766404713 0 67.49850045 1.367249325000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf155 3.21841285718 0 67.33550015 1.6117497749999927 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf156 2.67224263589 0 67.32799985 1.6230002249999984 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 6 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf157 2.9056379369 0 67.64949955 1.1407506749999925 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf158 2.8024909535 0 67.97099995 0.658500074999985 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf159 3.06069007119 0 67.735 1.0124999999999957 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 5 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf160 2.93344852167 0 67.7205002 1.0342496999999895 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 4 7 promise swing_level 6 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv perf 28 add fp32 1 relu fp32 1 +11 gpu conv perf 28 add fp16 1 relu fp16 1 12 promise swing_level 7 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf161 3.25343417215 0 67.294999975 1.6725000375000008 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 4 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf162 2.87767186701 0 67.90750065 0.7537490249999905 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf163 3.26230891516 0 67.394500525 1.523249212499998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 5 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf164 2.58808353227 0 67.547999575 1.2930006374999863 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 6 7 promise swing_level 3 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 33 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 33 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf165 2.9056379369 0 67.7854994 0.9367508999999856 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf166 2.6497691932 0 67.83150045 0.8677493250000055 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf167 3.15100834096 0 67.340000575 1.6049991374999877 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf168 2.85023899865 0 67.2665013 1.7152480499999925 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv samp 33 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv samp 33 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf169 2.95956658772 0 67.3390005 1.6064992499999988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 10 promise swing_level 7 11 promise swing_level 3 12 promise swing_level 4 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf170 2.41290309642 0 67.690499325 1.079251012499988 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 7 10 promise swing_level 3 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf171 2.53166513648 0 68.206999775 0.6030002249999967 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 6 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf172 2.44548811485 0 68.387500925 0.42249907499999895 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 4 7 promise swing_level 4 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf173 2.54976408291 0 67.132499675 1.9162504874999868 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 4 7 promise swing_level 4 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 promise swing_level 3 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 promise swing_level 3 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf174 3.25654426801 0 67.348999675 1.5915004874999923 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 4 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf175 2.78163247506 0 67.784500275 0.9382495874999961 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 4 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 10 promise swing_level 7 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf176 2.7774540652 0 67.85099975 0.8385003749999953 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 6 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf177 3.16602332403 0 67.415000025 1.4924999624999984 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 7 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf178 2.99928340424 0 67.620000475 1.1849992874999984 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf179 3.10842091438 0 67.543000075 1.3004998875000027 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf180 3.16434792981 0 67.512499975 1.346250037499999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 6 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf181 3.14769099844 0 67.501499575 1.3627506374999996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 5 12 promise swing_level 6 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf182 3.24329708136 0 67.339500025 1.605749962499985 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 4 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf183 2.57250768984 0 67.90250055 0.7612491749999961 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 6 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 5 11 promise swing_level 4 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf184 2.96104769987 0 67.840500275 0.8542495875 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 4 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv perf 22 add fp32 1 relu fp32 1 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf185 2.80775454314 0 67.749499875 0.990750187499998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 6 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf186 2.86114914149 0 68.0694998 0.5107502999999909 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 6 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf187 2.71688129157 0 67.126000575 1.9259991374999856 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv samp 33 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv samp 33 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf188 2.76363435925 0 67.95000015 0.689999775000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 10 promise swing_level 3 -11 gpu conv perf 22 add fp32 1 relu fp32 1 +11 gpu conv perf 22 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf189 3.11814628663 0 67.44249955 1.4512506750000043 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 6 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf190 2.70610341797 0 67.401500375 1.5127494374999984 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 -10 gpu conv samp 34 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 34 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 6 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf191 3.25343417215 0 67.38999995 1.5300000749999896 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf192 3.08278073167 0 67.498499825 1.3672502625000007 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 6 6 promise swing_level 3 7 promise swing_level 6 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf193 2.58808353227 0 67.463999775 1.4190003374999876 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 4 7 promise swing_level 4 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 7 10 promise swing_level 6 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf194 2.51013921093 0 68.267498975 0.5425010250000014 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 7 7 promise swing_level 4 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf195 3.18287536154 0 67.420500475 1.484249287499999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 7 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf196 2.82100042769 0 67.85800115 0.8279982749999846 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 6 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf197 3.27123220779 0 67.3775007 1.5487489499999967 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf198 3.14107722411 0 67.4504995 1.4392507499999851 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf199 2.52525037327 0 68.27200055 0.5379994499999953 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 3 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf200 2.74522394531 0 67.235499625 1.7617505624999907 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 4 7 promise swing_level 4 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 7 10 promise swing_level 3 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf201 3.09234607698 0 67.345000125 1.597499812499997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf202 3.0451038749 0 67.457999825 1.4280002624999923 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 7 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf203 2.75983498084 0 67.7724999 0.9562501499999954 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 5 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf204 3.1744269774 0 67.52099945 1.3335008249999873 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf205 3.12466374763 0 67.488498875 1.3822516874999877 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf206 3.16057588248 0 67.364000725 1.5689989124999997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 6 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf207 2.72584052477 0 67.37149965 1.5577505249999888 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 -10 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf208 2.81935907333 0 67.757000225 0.9794996624999968 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf209 2.81935907333 0 67.777501125 0.948748312499994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf210 2.43451503363 0 67.679999675 1.0950004874999877 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 4 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv samp 31 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp 31 add fp16 1 relu fp16 1 9 promise swing_level 5 10 promise swing_level 6 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf211 3.16057588248 0 67.39099985 1.5285002249999948 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 6 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 5 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf212 2.89158728487 0 67.759999425 0.9750008624999893 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 5 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf213 2.14311156573 0 67.195499825 1.8217502624999966 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 5 -8 gpu conv perf 21 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv perf 21 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 7 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf214 3.11448352037 0 67.626999675 1.174500487500005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 6 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv samp 31 add fp32 1 relu fp32 1 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv samp 31 add fp16 1 relu fp16 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf215 3.10519259098 0 67.68199925 1.0920011249999888 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 6 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf216 2.84245577384 0 67.88550015 0.786749774999997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf217 2.39816124262 0 67.08349895 1.9897515749999855 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 4 7 promise swing_level 4 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 -9 gpu conv perf 23 add fp32 1 relu fp32 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 +9 gpu conv perf 23 add fp16 1 relu fp16 1 10 promise swing_level 6 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv samp 34 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv samp 34 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf218 2.81800215482 0 67.883499075 0.7897513874999902 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 4 promise swing_level 6 5 promise swing_level 3 6 promise swing_level 3 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf219 3.28020444953 0 67.40249985 1.5112502249999977 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf220 3.14230640559 0 67.370000075 1.5599998875000054 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 7 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf221 2.8985955837 0 67.8929999 0.775500149999985 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf222 2.45099045836 0 68.378000675 0.43199932500000104 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 4 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 7 10 promise swing_level 3 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf223 3.05131926553 0 67.800499525 0.9142507124999852 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 4 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv perf 28 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 28 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf224 2.74554576198 0 67.275000375 1.7024994374999878 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf225 2.71038337866 0 67.134000275 1.9139995874999869 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 4 7 promise swing_level 4 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 7 10 promise swing_level 6 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf226 3.0673709943 0 67.57199855 1.2570021749999896 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 -12 gpu conv perf 24 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +12 gpu conv perf 24 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf227 2.70121372036 0 67.816999825 0.8895002625000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 7 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 -10 gpu conv perf 22 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv perf 22 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf228 2.77131923881 0 67.895999325 0.7710010124999869 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 6 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 22 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 22 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf229 2.7985840613 0 67.70250055 1.0612491750000004 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 25 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv perf 25 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf230 3.02967561599 0 67.493 1.3755000000000024 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 3 7 promise swing_level 6 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 6 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf231 3.16895060392 0 67.461000275 1.4234995874999896 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 6 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf232 2.35215884609 0 67.713499875 1.0447501875 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 6 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 7 10 promise swing_level 3 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf233 2.42858256761 0 68.1669994 0.643000600000002 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 4 7 promise swing_level 4 -8 gpu conv fp16 1 add fp32 1 relu fp32 1 +8 gpu conv fp16 1 add fp16 1 relu fp16 1 9 promise swing_level 7 -10 gpu conv samp 33 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv samp 32 add fp32 1 relu fp32 1 -12 gpu conv samp 32 add fp32 1 relu fp32 1 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv samp 33 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 +12 gpu conv samp 32 add fp16 1 relu fp16 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf234 2.9056379369 0 67.550500175 1.2892497375000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf235 3.14107722411 0 67.46599905 1.4160014250000046 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 5 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf236 3.19819632002 0 67.394000175 1.5239997374999916 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 6 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf237 3.10156017619 0 67.589499475 1.2307507874999999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf238 3.23233051576 0 67.46199975 1.422000374999989 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 6 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf239 2.95482143693 0 67.741001 1.0034984999999992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf240 2.77646627979 0 67.875500125 0.8017498124999918 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf241 3.083186562 0 67.53550015 1.3117497749999885 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf242 2.74554576198 0 67.2829996 1.6905006 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 1 add fp16 1 relu fp16 1 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 7 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf243 3.05131926553 0 67.79399985 0.9240002249999861 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 4 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 10 promise swing_level 3 -11 gpu conv samp 32 add fp32 1 relu fp32 1 +11 gpu conv samp 32 add fp16 1 relu fp16 1 12 promise swing_level 6 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf244 3.15766404713 0 67.518 1.3379999999999939 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 -11 gpu conv perf 23 add fp32 1 relu fp32 1 +11 gpu conv perf 23 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf245 3.01705640785 0 67.372000475 1.5569992875000054 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 7 promise swing_level 6 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 7 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf246 3.20505308102 0 67.42799925 1.4730011249999961 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 6 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf247 2.43278700387 0 68.305500425 0.5044995749999913 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 3 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf248 3.18413748054 0 67.4080005 1.502999249999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 3 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 10 promise swing_level 3 11 promise swing_level 3 12 promise swing_level 3 -13 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf249 3.05794266129 0 66.70000035 2.5649994750000005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 3 12 promise swing_level 4 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf250 3.08555603683 0 66.78650035 2.435249474999999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf251 2.41196089803 0 67.511999925 1.3470001124999982 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -5 gpu conv samp 32 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv samp 32 add fp16 1 relu fp16 1 6 promise swing_level 5 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 24 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 24 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf252 3.0079504835 0 66.499000375 2.8664994375000035 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 4 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 35 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 35 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf253 2.61786798314 0 67.349499675 1.5907504874999887 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 5 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 24 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 24 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf254 3.29375547639 0 66.964499625 2.168250562499992 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 4 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 4 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf255 2.94899257744 0 66.64599935 2.6460009749999998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 5 7 promise swing_level 6 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 4 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf256 3.34863912682 0 66.5320009 2.816998649999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 6 6 promise swing_level 4 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf257 3.30741893044 0 66.92450045 2.228249325 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 4 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf258 2.45654982577 0 66.53150015 2.817749774999996 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 24 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 24 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf259 2.44649928694 0 66.703500575 2.559749137499985 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 4 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf260 2.70946883647 0 66.585999875 2.7360001874999966 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 6 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 21 add fp16 1 relu fp16 1 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf261 2.90775732059 0 66.801500475 2.4127492874999987 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 6 6 promise swing_level 6 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 4 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf262 3.03466443177 0 66.980500025 2.144249962499998 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 4 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 4 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf263 2.8024909535 0 66.689999975 2.5800000374999854 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 5 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 4 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf264 2.92522009873 0 66.59600035 2.7209994749999993 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 5 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 25 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 25 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf265 2.90953336708 0 66.73500025 2.5124996249999967 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 4 7 promise swing_level 6 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 4 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf266 2.77969290041 0 66.95600145 2.1809978249999915 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 4 -7 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +7 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 4 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf267 3.3060572085 0 66.58050055 2.744249175 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 4 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf268 2.82836753804 0 66.747500825 2.4937487624999903 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 24 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 24 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf269 2.62937915751 0 66.98849965 2.1322505250000034 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 24 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 24 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf270 2.95847618969 0 67.296499875 1.6702501875000024 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 4 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf271 2.89158728487 0 66.570999725 2.7585004125000054 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 7 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 21 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 21 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 11 promise swing_level 7 12 promise swing_level 5 13 promise swing_level 3 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf272 3.05950724751 0 66.596000125 2.72099981249999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 5 6 promise swing_level 7 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 4 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf273 3.30741893044 0 66.7635002 2.4697497000000013 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 4 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf274 2.83843673884 0 66.785999725 2.4360004125000003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 24 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 24 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 4 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf275 2.63807919217 0 67.3154999 1.6417501499999858 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 24 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 24 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 4 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf276 2.61786798314 0 66.643000575 2.6504991374999918 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 5 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 24 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 24 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf277 2.97688641188 0 66.993499525 2.1247507124999885 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 4 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf278 2.55740906434 0 66.58849945 2.732250824999994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 5 7 promise swing_level 6 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 24 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 24 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf279 2.9356086849 0 66.746499425 2.4952508625000007 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 4 -13 gpu conv perf 23 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv perf 23 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf280 2.98804293927 0 66.988500175 2.132249737499997 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 4 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 4 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf281 2.97688641188 0 66.7040005 2.558999249999985 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 4 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf282 2.61786798314 0 67.0555 2.0317500000000024 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 5 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 24 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 24 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf283 3.09353740765 0 67.00050035 2.114249475000001 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 4 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf284 3.36796114687 0 66.552001 2.7869984999999886 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 4 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 4 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf285 2.84618352747 0 66.6989998 2.5665003000000013 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 5 7 promise swing_level 6 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf286 2.90106400737 0 66.8144994 2.393250899999991 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 -12 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 +12 gpu conv perf 25 add fp16 1 relu fp16 1 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf287 3.0451038749 0 66.551500525 2.787749212500003 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 4 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf288 3.0451038749 0 66.569500375 2.760749437499989 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 5 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 4 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf289 2.84583768758 0 67.2384996 1.757250599999999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 27 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 27 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 4 7 promise swing_level 7 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 4 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 4 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf290 3.00833684977 0 66.593500925 2.724748612499994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 6 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 4 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf291 2.79205049498 0 66.580500175 2.744249737499999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 7 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 24 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 24 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 4 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf292 2.73271838747 0 66.661500175 2.622749737499994 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 6 -3 gpu conv fp16 1 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 5 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv perf 24 add fp32 1 relu fp32 1 -10 gpu conv perf 21 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 24 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv perf 24 add fp16 1 relu fp16 1 +10 gpu conv perf 21 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 24 add fp16 1 relu fp16 1 12 promise swing_level 5 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf293 3.09353740765 0 66.762499875 2.47125018749999 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 -2 gpu conv perf 26 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 +2 gpu conv perf 26 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 3 6 promise swing_level 4 7 promise swing_level 3 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 3 13 promise swing_level 7 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf294 3.32581418762 0 66.602000775 2.711998837500005 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 7 6 promise swing_level 4 7 promise swing_level 4 -8 gpu conv perf 25 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 9 promise swing_level 3 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv perf 21 add fp32 1 relu fp32 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf 21 add fp16 1 relu fp16 1 12 promise swing_level 5 -13 gpu conv samp 31 add fp32 1 relu fp32 1 pool_max fp32 1 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +13 gpu conv samp 31 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- +++++ conf295 2.86388974027 0 66.834000225 2.3639996624999995 -1 gpu conv fp16 1 add fp32 1 relu fp32 1 +1 gpu conv fp16 1 add fp16 1 relu fp16 1 2 promise swing_level 3 -3 gpu conv samp 32 add fp32 1 relu fp32 1 -4 gpu conv fp16 1 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv samp 32 add fp16 1 relu fp16 1 +4 gpu conv fp16 1 add fp16 1 relu fp16 1 pool_max fp16 1 5 promise swing_level 4 6 promise swing_level 5 7 promise swing_level 7 -8 gpu conv perf 25 add fp32 1 relu fp32 1 -9 gpu conv samp 31 add fp32 1 relu fp32 1 -10 gpu conv samp 32 add fp32 1 relu fp32 1 pool_max fp32 1 -11 gpu conv fp16 1 add fp32 1 relu fp32 1 +8 gpu conv perf 25 add fp16 1 relu fp16 1 +9 gpu conv samp 31 add fp16 1 relu fp16 1 +10 gpu conv samp 32 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 1 add fp16 1 relu fp16 1 12 promise swing_level 4 13 promise swing_level 4 -14 gpu mul fp16 1 add fp32 1 relu fp32 1 -15 gpu mul fp16 1 add fp32 1 -16 gpu softmax fp32 1 +14 gpu mul fp16 1 add fp16 1 relu fp16 1 +15 gpu mul fp16 1 add fp16 1 +16 gpu softmax fp16 1 ----- -- GitLab